Support FP32 requantization in AVX2 QS8 microkernels

PiperOrigin-RevId: 375822588
diff --git a/BUILD.bazel b/BUILD.bazel
index bd027df..64d6fc9 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -3359,26 +3359,44 @@
     "src/math/sigmoid-avx2-rr2-p5-nr1fma.c",
     "src/math/sigmoid-avx2-rr2-p5-nr2fma.c",
     "src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx2-mul32.c",
+    "src/qs8-dwconv/gen/up8x9-minmax-fp32-avx2-mul32.c",
     "src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx2-mul32.c",
+    "src/qs8-dwconv/gen/up8x25-minmax-fp32-avx2-mul32.c",
     "src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul16.c",
+    "src/qs8-dwconv/gen/up16x9-minmax-fp32-avx2-mul16.c",
     "src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul32.c",
+    "src/qs8-dwconv/gen/up16x9-minmax-fp32-avx2-mul32.c",
     "src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul16.c",
+    "src/qs8-dwconv/gen/up16x25-minmax-fp32-avx2-mul16.c",
     "src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul32.c",
+    "src/qs8-dwconv/gen/up16x25-minmax-fp32-avx2-mul32.c",
     "src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx2-mul32.c",
+    "src/qs8-dwconv/gen/up24x9-minmax-fp32-avx2-mul32.c",
     "src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx2-mul32.c",
+    "src/qs8-dwconv/gen/up24x25-minmax-fp32-avx2-mul32.c",
     "src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul16.c",
+    "src/qs8-dwconv/gen/up32x9-minmax-fp32-avx2-mul16.c",
     "src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul32.c",
+    "src/qs8-dwconv/gen/up32x9-minmax-fp32-avx2-mul32.c",
     "src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul16.c",
+    "src/qs8-dwconv/gen/up32x25-minmax-fp32-avx2-mul16.c",
     "src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul32.c",
+    "src/qs8-dwconv/gen/up32x25-minmax-fp32-avx2-mul32.c",
     "src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-avx2.c",
+    "src/qs8-gemm/gen/1x8c8-minmax-fp32-avx2.c",
     "src/qs8-gemm/gen/1x8c8-xw-minmax-gemmlowp-avx2.c",
     "src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-avx2.c",
+    "src/qs8-gemm/gen/2x8c8-minmax-fp32-avx2.c",
     "src/qs8-gemm/gen/2x8c8-xw-minmax-gemmlowp-avx2.c",
     "src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-avx2.c",
+    "src/qs8-gemm/gen/3x8c8-minmax-fp32-avx2.c",
     "src/qs8-gemm/gen/3x8c8-xw-minmax-gemmlowp-avx2.c",
     "src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-avx2.c",
+    "src/qs8-igemm/gen/1x8c8-minmax-fp32-avx2.c",
     "src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-avx2.c",
+    "src/qs8-igemm/gen/2x8c8-minmax-fp32-avx2.c",
     "src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-avx2.c",
+    "src/qs8-igemm/gen/3x8c8-minmax-fp32-avx2.c",
     "src/qs8-vadd/gen/minmax-avx2-mul32-ld64-x8.c",
     "src/qs8-vadd/gen/minmax-avx2-mul32-ld64-x16.c",
     "src/qs8-vadd/gen/minmax-avx2-mul32-ld64-x24.c",
@@ -6904,6 +6922,16 @@
 )
 
 xnnpack_unit_test(
+    name = "qs8_dwconv_minmax_fp32_test",
+    srcs = [
+        "test/qs8-dwconv-minmax-fp32.cc",
+        "test/dwconv-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS + [":packing"],
+)
+
+xnnpack_unit_test(
     name = "qs8_gavgpool_minmax_test",
     srcs = [
         "test/qs8-gavgpool-minmax.cc",
@@ -6925,6 +6953,17 @@
 )
 
 xnnpack_unit_test(
+    name = "qs8_gemm_minmax_fp32_test",
+    timeout = "moderate",
+    srcs = [
+        "test/qs8-gemm-minmax-fp32.cc",
+        "test/gemm-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS + [":packing"],
+)
+
+xnnpack_unit_test(
     name = "qs8_igemm_minmax_gemmlowp_test",
     timeout = "moderate",
     srcs = [
@@ -6936,6 +6975,17 @@
 )
 
 xnnpack_unit_test(
+    name = "qs8_igemm_minmax_fp32_test",
+    timeout = "moderate",
+    srcs = [
+        "test/qs8-igemm-minmax-fp32.cc",
+        "test/gemm-microkernel-tester.h",
+        "src/xnnpack/AlignedAllocator.h",
+    ] + WEIGHTS_PACK_HDRS + MICROKERNEL_TEST_HDRS,
+    deps = MICROKERNEL_TEST_DEPS + [":packing"],
+)
+
+xnnpack_unit_test(
     name = "qs8_requantization_test",
     srcs = [
         "src/xnnpack/requantization-stubs.h",
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3dc890e..f9840bb 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2593,26 +2593,44 @@
   src/math/sigmoid-avx2-rr2-p5-nr1fma.c
   src/math/sigmoid-avx2-rr2-p5-nr2fma.c
   src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx2-mul32.c
+  src/qs8-dwconv/gen/up8x9-minmax-fp32-avx2-mul32.c
   src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx2-mul32.c
+  src/qs8-dwconv/gen/up8x25-minmax-fp32-avx2-mul32.c
   src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul16.c
+  src/qs8-dwconv/gen/up16x9-minmax-fp32-avx2-mul16.c
   src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul32.c
+  src/qs8-dwconv/gen/up16x9-minmax-fp32-avx2-mul32.c
   src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul16.c
+  src/qs8-dwconv/gen/up16x25-minmax-fp32-avx2-mul16.c
   src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul32.c
+  src/qs8-dwconv/gen/up16x25-minmax-fp32-avx2-mul32.c
   src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx2-mul32.c
+  src/qs8-dwconv/gen/up24x9-minmax-fp32-avx2-mul32.c
   src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx2-mul32.c
+  src/qs8-dwconv/gen/up24x25-minmax-fp32-avx2-mul32.c
   src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul16.c
+  src/qs8-dwconv/gen/up32x9-minmax-fp32-avx2-mul16.c
   src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul32.c
+  src/qs8-dwconv/gen/up32x9-minmax-fp32-avx2-mul32.c
   src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul16.c
+  src/qs8-dwconv/gen/up32x25-minmax-fp32-avx2-mul16.c
   src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul32.c
+  src/qs8-dwconv/gen/up32x25-minmax-fp32-avx2-mul32.c
   src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-avx2.c
+  src/qs8-gemm/gen/1x8c8-minmax-fp32-avx2.c
   src/qs8-gemm/gen/1x8c8-xw-minmax-gemmlowp-avx2.c
   src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-avx2.c
+  src/qs8-gemm/gen/2x8c8-minmax-fp32-avx2.c
   src/qs8-gemm/gen/2x8c8-xw-minmax-gemmlowp-avx2.c
   src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-avx2.c
+  src/qs8-gemm/gen/3x8c8-minmax-fp32-avx2.c
   src/qs8-gemm/gen/3x8c8-xw-minmax-gemmlowp-avx2.c
   src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-avx2.c
+  src/qs8-igemm/gen/1x8c8-minmax-fp32-avx2.c
   src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-avx2.c
+  src/qs8-igemm/gen/2x8c8-minmax-fp32-avx2.c
   src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-avx2.c
+  src/qs8-igemm/gen/3x8c8-minmax-fp32-avx2.c
   src/qs8-vadd/gen/minmax-avx2-mul32-ld64-x8.c
   src/qs8-vadd/gen/minmax-avx2-mul32-ld64-x16.c
   src/qs8-vadd/gen/minmax-avx2-mul32-ld64-x24.c
@@ -4610,6 +4628,15 @@
   TARGET_LINK_LIBRARIES(qs8-dwconv-minmax-gemmlowp-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
   ADD_TEST(qs8-dwconv-minmax-gemmlowp-test qs8-dwconv-minmax-gemmlowp-test)
 
+  ADD_EXECUTABLE(qs8-dwconv-minmax-fp32-test test/qs8-dwconv-minmax-fp32.cc)
+  SET_TARGET_PROPERTIES(qs8-dwconv-minmax-fp32-test PROPERTIES
+    CXX_STANDARD 11
+    CXX_STANDARD_REQUIRED YES
+    CXX_EXTENSIONS YES)
+  TARGET_INCLUDE_DIRECTORIES(qs8-dwconv-minmax-fp32-test PRIVATE src test)
+  TARGET_LINK_LIBRARIES(qs8-dwconv-minmax-fp32-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+  ADD_TEST(qs8-dwconv-minmax-fp32-test qs8-dwconv-minmax-fp32-test)
+
   ADD_EXECUTABLE(qs8-gavgpool-minmax-test test/qs8-gavgpool-minmax.cc)
   SET_TARGET_PROPERTIES(qs8-gavgpool-minmax-test PROPERTIES
     CXX_STANDARD 11
@@ -4628,6 +4655,15 @@
   TARGET_LINK_LIBRARIES(qs8-gemm-minmax-gemmlowp-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
   ADD_TEST(qs8-gemm-minmax-gemmlowp-test qs8-gemm-minmax-gemmlowp-test)
 
+  ADD_EXECUTABLE(qs8-gemm-minmax-fp32-test test/qs8-gemm-minmax-fp32.cc)
+  SET_TARGET_PROPERTIES(qs8-gemm-minmax-fp32-test PROPERTIES
+    CXX_STANDARD 11
+    CXX_STANDARD_REQUIRED YES
+    CXX_EXTENSIONS YES)
+  TARGET_INCLUDE_DIRECTORIES(qs8-gemm-minmax-fp32-test PRIVATE src test)
+  TARGET_LINK_LIBRARIES(qs8-gemm-minmax-fp32-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+  ADD_TEST(qs8-gemm-minmax-fp32-test qs8-gemm-minmax-fp32-test)
+
   ADD_EXECUTABLE(qs8-igemm-minmax-gemmlowp-test test/qs8-igemm-minmax-gemmlowp.cc)
   SET_TARGET_PROPERTIES(qs8-igemm-minmax-gemmlowp-test PROPERTIES
     CXX_STANDARD 11
@@ -4637,6 +4673,15 @@
   TARGET_LINK_LIBRARIES(qs8-igemm-minmax-gemmlowp-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
   ADD_TEST(qs8-igemm-minmax-gemmlowp-test qs8-igemm-minmax-gemmlowp-test)
 
+  ADD_EXECUTABLE(qs8-igemm-minmax-fp32-test test/qs8-igemm-minmax-fp32.cc)
+  SET_TARGET_PROPERTIES(qs8-igemm-minmax-fp32-test PROPERTIES
+    CXX_STANDARD 11
+    CXX_STANDARD_REQUIRED YES
+    CXX_EXTENSIONS YES)
+  TARGET_INCLUDE_DIRECTORIES(qs8-igemm-minmax-fp32-test PRIVATE src test)
+  TARGET_LINK_LIBRARIES(qs8-igemm-minmax-fp32-test PRIVATE XNNPACK cpuinfo fp16 gtest gtest_main)
+  ADD_TEST(qs8-igemm-minmax-fp32-test qs8-igemm-minmax-fp32-test)
+
   ADD_EXECUTABLE(qs8-vadd-minmax-test test/qs8-vadd-minmax.cc)
   SET_TARGET_PROPERTIES(qs8-vadd-minmax-test PROPERTIES
     CXX_STANDARD 11
diff --git a/bench/qs8-gemm.cc b/bench/qs8-gemm.cc
index 1f3515b..4f1e5df 100644
--- a/bench/qs8-gemm.cc
+++ b/bench/qs8-gemm.cc
@@ -214,307 +214,307 @@
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
   static void qs8_gemm_1x8__neon_mlal_lane(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, 1, 8, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8__neon_mlal_lane(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, 2, 8, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x8__neon_mlal_lane(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, 3, 8, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x8__neon_mlal_lane(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, 4, 8, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_6x8__neon_mlal_lane(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, 6, 8, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x16__neon_mlal_lane(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, 1, 16, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x16__neon_mlal_lane(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, 2, 16, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x16__neon_mlal_lane(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, 3, 16, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x16__neon_mlal_lane(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, 4, 16, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_6x16__neon_mlal_lane(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, 6, 16, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, 1, 8, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, 2, 8, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x8__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, 3, 8, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x8__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, 4, 8, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_6x8__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, 6, 8, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x16__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, 1, 16, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x16__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, 2, 16, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x16__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, 3, 16, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x16__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, 4, 16, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_6x16__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, 6, 16, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8__neon_mull_addw_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, 1, 8, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8__neon_mull_addw_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, 2, 8, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x8__neon_mull_addw_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, 3, 8, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x8__neon_mull_addw_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, 4, 8, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x16__neon_mull_addw_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, 1, 16, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x16__neon_mull_addw_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, 2, 16, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x16__neon_mull_addw_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, 3, 16, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x16__neon_mull_addw_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, 4, 16, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8c2__neon_mull_padal_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, 1, 8, 2, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8c2__neon_mull_padal_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, 2, 8, 2, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x8c2__neon_mull_padal_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, 3, 8, 2, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x8c2__neon_mull_padal_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, 4, 8, 2, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x16c2__neon_mull_padal_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, 1, 16, 2, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x16c2__neon_mull_padal_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, 2, 16, 2, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x16c2__neon_mull_padal_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, 3, 16, 2, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x16c2__neon_mull_padal_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, 4, 16, 2, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, 1, 8, 2, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, 2, 8, 2, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x8c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, 3, 8, 2, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x8c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, 4, 8, 2, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x16c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, 1, 16, 2, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x16c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, 2, 16, 2, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x16c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, 3, 16, 2, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x16c2__neon_mlal_padal_dup(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, 4, 16, 2, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8c8__neon_mull_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, 1, 8, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8c8__neon_mull_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, 2, 8, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x8c8__neon_mull_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, 3, 8, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x8c8__neon_mull_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, 4, 8, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x16c8__neon_mull_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, 1, 16, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x16c8__neon_mull_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, 2, 16, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x16c8__neon_mull_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, 3, 16, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x16c8__neon_mull_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, 4, 16, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8c8__neon_mlal_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, 1, 8, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8c8__neon_mlal_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, 2, 8, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x8c8__neon_mlal_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, 3, 8, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x8c8__neon_mlal_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, 4, 8, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x16c8__neon_mlal_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, 1, 16, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x16c8__neon_mlal_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, 2, 16, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x16c8__neon_mlal_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, 3, 16, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x16c8__neon_mlal_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, 4, 16, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8c16__neon_mlal_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, 1, 8, 16, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8c16__neon_mlal_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, 2, 8, 16, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x8c16__neon_mlal_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, 3, 8, 16, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x8c16__neon_mlal_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, 4, 8, 16, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x16c16__neon_mlal_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, 1, 16, 16, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x16c16__neon_mlal_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, 2, 16, 16, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_3x16c16__neon_mlal_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, 3, 16, 16, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x16c16__neon_mlal_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, 4, 16, 16, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8c4__neondot(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, 1, 8, 4, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEONDOT);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_4x8c4__neondot(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, 4, 8, 4, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEONDOT);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_6x8c4__neondot(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, 6, 8, 4, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEONDOT);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_8x8c4__neondot(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, 8, 8, 4, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEONDOT);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_1x16c4__neondot(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, 1, 16, 4, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEONDOT);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_4x16c4__neondot(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, 4, 16, 4, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEONDOT);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_6x16c4__neondot(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, 6, 16, 4, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEONDOT);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_8x16c4__neondot(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, 8, 16, 4, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEONDOT);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEONDOT);
   }
 
   BENCHMARK_GEMM(qs8_gemm_1x8__neon_mlal_lane)
@@ -599,71 +599,71 @@
 #if XNN_ARCH_ARM64
   static void qs8_gemm_4x16c4__aarch64_neondot_cortex_a55(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, 4, 16, 4, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEONDOT);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_1x16c4__aarch64_neondot_ld32(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, 1, 16, 4, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEONDOT);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_1x16c4__aarch64_neondot_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, 1, 16, 4, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEONDOT);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_4x16c4__aarch64_neondot_ld32(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, 4, 16, 4, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEONDOT);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_4x16c4__aarch64_neondot_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, 4, 16, 4, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEONDOT);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEONDOT);
   }
   static void qs8_gemm_4x16__aarch64_neon_mlal_lane_cortex_a53(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, 4, 16, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, 4, 16, 1, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8c8__aarch64_neon_mlal_padal_prfm(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, 1, 8, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8c8__aarch64_neon_mlal_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, 1, 8, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8c8__aarch64_neon_mlal_padal_cortex_a53(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, 1, 8, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, 1, 8, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8c8__aarch64_neon_mull_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, 2, 8, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8c8__aarch64_neon_mlal_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, 2, 8, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8c8__aarch64_neon_mlal_padal_prfm(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, 2, 8, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8c8__aarch64_neon_mlal_padal_cortex_a53(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, 2, 8, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, 2, 8, 8, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
   static void qs8_gemm_2x8c16__aarch64_neon_mlal_padal(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, 2, 8, 16, 1,
-      xnn_init_qs8_conv_minmax_neon_params, benchmark::utils::CheckNEON);
+      xnn_init_qs8_conv_minmax_gemmlowp_neon_params, benchmark::utils::CheckNEON);
   }
 
   BENCHMARK_GEMM(qs8_gemm_1x16c4__aarch64_neondot_ld32)
@@ -689,363 +689,363 @@
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
   static void qs8_gemm_2x16c8__avx512skx(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, 2, 16, 8, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckAVX512SKX);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckAVX512SKX);
   }
   static void qs8_gemm_3x16c8__avx512skx(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, 3, 16, 8, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckAVX512SKX);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckAVX512SKX);
   }
   static void qs8_gemm_4x16c8__avx512skx(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, 4, 16, 8, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckAVX512SKX);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckAVX512SKX);
   }
 
   static void qs8_gemm_2x8c8__avx2(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, 2, 8, 8, 1,
-      xnn_init_qs8_conv_minmax_avx2_params, benchmark::utils::CheckAVX2);
+      xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, benchmark::utils::CheckAVX2);
   }
   static void qs8_gemm_3x8c8__avx2(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, 3, 8, 8, 1,
-      xnn_init_qs8_conv_minmax_avx2_params, benchmark::utils::CheckAVX2);
+      xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, benchmark::utils::CheckAVX2);
   }
 
   static void qs8_gemm_xw_2x8c8__avx2(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, 2, 8, 8, 1,
-      xnn_init_qs8_conv_minmax_avx2_params, benchmark::utils::CheckAVX2, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, benchmark::utils::CheckAVX2, true);
   }
   static void qs8_gemm_xw_3x8c8__avx2(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, 3, 8, 8, 1,
-      xnn_init_qs8_conv_minmax_avx2_params, benchmark::utils::CheckAVX2, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, benchmark::utils::CheckAVX2, true);
   }
 
   static void qs8_gemm_2x4c2__xop_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, 2, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckXOP);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckXOP);
   }
   static void qs8_gemm_3x4c2__xop_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, 3, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckXOP);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckXOP);
   }
   static void qs8_gemm_4x4c2__xop_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, 4, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckXOP);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckXOP);
   }
 
   static void qs8_gemm_2x4c2__xop_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, 2, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckXOP);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckXOP);
   }
   static void qs8_gemm_3x4c2__xop_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, 3, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckXOP);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckXOP);
   }
   static void qs8_gemm_4x4c2__xop_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, 4, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckXOP);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckXOP);
   }
 
   static void qs8_gemm_xw_2x4c2__xop(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__xop, 2, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckXOP, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckXOP, true);
   }
   static void qs8_gemm_xw_3x4c2__xop(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__xop, 3, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckXOP, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckXOP, true);
   }
   static void qs8_gemm_xw_4x4c2__xop(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, 4, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckXOP, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckXOP, true);
   }
 
   static void qs8_gemm_2x4c8__xop_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, 2, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckXOP);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckXOP);
   }
   static void qs8_gemm_3x4c8__xop_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, 3, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckXOP);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckXOP);
   }
 
   static void qs8_gemm_2x4c8__xop_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, 2, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckXOP);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckXOP);
   }
   static void qs8_gemm_3x4c8__xop_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, 3, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckXOP);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckXOP);
   }
 
   static void qs8_gemm_xw_2x4c8__xop(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, 2, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckXOP, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckXOP, true);
   }
   static void qs8_gemm_xw_3x4c8__xop(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, 3, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckXOP, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckXOP, true);
   }
 
   static void qs8_gemm_2x4c2__avx_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, 2, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckAVX);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckAVX);
   }
   static void qs8_gemm_3x4c2__avx_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, 3, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckAVX);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckAVX);
   }
   static void qs8_gemm_4x4c2__avx_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, 4, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckAVX);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckAVX);
   }
 
   static void qs8_gemm_2x4c2__avx_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, 2, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckAVX);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckAVX);
   }
   static void qs8_gemm_3x4c2__avx_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, 3, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckAVX);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckAVX);
   }
   static void qs8_gemm_4x4c2__avx_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, 4, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckAVX);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckAVX);
   }
 
   static void qs8_gemm_xw_2x4c2__avx(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__avx, 2, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckAVX, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckAVX, true);
   }
   static void qs8_gemm_xw_3x4c2__avx(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__avx, 3, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckAVX, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckAVX, true);
   }
   static void qs8_gemm_xw_4x4c2__avx(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, 4, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckAVX, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckAVX, true);
   }
 
   static void qs8_gemm_2x4c8__avx_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, 2, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckAVX);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckAVX);
   }
   static void qs8_gemm_3x4c8__avx_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, 3, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckAVX);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckAVX);
   }
 
   static void qs8_gemm_2x4c8__avx_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, 2, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckAVX);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckAVX);
   }
   static void qs8_gemm_3x4c8__avx_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, 3, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckAVX);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckAVX);
   }
 
   static void qs8_gemm_xw_2x4c8__avx(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, 2, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckAVX, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckAVX, true);
   }
   static void qs8_gemm_xw_3x4c8__avx(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, 3, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckAVX, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckAVX, true);
   }
 
   static void qs8_gemm_2x4c2__sse41_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, 2, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckSSE41);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckSSE41);
   }
   static void qs8_gemm_3x4c2__sse41_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, 3, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckSSE41);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckSSE41);
   }
   static void qs8_gemm_4x4c2__sse41_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, 4, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckSSE41);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckSSE41);
   }
 
   static void qs8_gemm_2x4c2__sse41_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, 2, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckSSE41);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckSSE41);
   }
   static void qs8_gemm_3x4c2__sse41_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, 3, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckSSE41);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckSSE41);
   }
   static void qs8_gemm_4x4c2__sse41_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, 4, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckSSE41);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckSSE41);
   }
 
   static void qs8_gemm_xw_2x4c2__sse41(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__sse41, 2, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckSSE41, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckSSE41, true);
   }
   static void qs8_gemm_xw_3x4c2__sse41(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__sse41, 3, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckSSE41, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckSSE41, true);
   }
   static void qs8_gemm_xw_4x4c2__sse41(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, 4, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckSSE41, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckSSE41, true);
   }
 
   static void qs8_gemm_2x4c8__sse41_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, 2, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckSSE41);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckSSE41);
   }
   static void qs8_gemm_3x4c8__sse41_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, 3, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckSSE41);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckSSE41);
   }
 
   static void qs8_gemm_2x4c8__sse41_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, 2, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckSSE41);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckSSE41);
   }
   static void qs8_gemm_3x4c8__sse41_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, 3, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckSSE41);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckSSE41);
   }
 
   static void qs8_gemm_xw_2x4c8__sse41(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, 2, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckSSE41, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckSSE41, true);
   }
   static void qs8_gemm_xw_3x4c8__sse41(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, 3, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse4_params, benchmark::utils::CheckSSE41, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, benchmark::utils::CheckSSE41, true);
   }
 
   static void qs8_gemm_2x4c2__ssse3_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, 2, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse2_params, benchmark::utils::CheckSSSE3);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, benchmark::utils::CheckSSSE3);
   }
   static void qs8_gemm_3x4c2__ssse3_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, 3, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse2_params, benchmark::utils::CheckSSSE3);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, benchmark::utils::CheckSSSE3);
   }
   static void qs8_gemm_4x4c2__ssse3_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, 4, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse2_params, benchmark::utils::CheckSSSE3);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, benchmark::utils::CheckSSSE3);
   }
 
   static void qs8_gemm_2x4c2__ssse3_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, 2, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse2_params, benchmark::utils::CheckSSSE3);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, benchmark::utils::CheckSSSE3);
   }
   static void qs8_gemm_3x4c2__ssse3_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, 3, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse2_params, benchmark::utils::CheckSSSE3);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, benchmark::utils::CheckSSSE3);
   }
   static void qs8_gemm_4x4c2__ssse3_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, 4, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse2_params, benchmark::utils::CheckSSSE3);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, benchmark::utils::CheckSSSE3);
   }
 
   static void qs8_gemm_xw_2x4c2__ssse3(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__ssse3, 2, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse2_params, benchmark::utils::CheckSSSE3, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, benchmark::utils::CheckSSSE3, true);
   }
   static void qs8_gemm_xw_3x4c2__ssse3(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__ssse3, 3, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse2_params, benchmark::utils::CheckSSSE3, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, benchmark::utils::CheckSSSE3, true);
   }
   static void qs8_gemm_xw_4x4c2__ssse3(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, 4, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse2_params, benchmark::utils::CheckSSSE3, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, benchmark::utils::CheckSSSE3, true);
   }
 
   static void qs8_gemm_2x4c8__ssse3_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, 2, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse2_params, benchmark::utils::CheckSSSE3);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, benchmark::utils::CheckSSSE3);
   }
   static void qs8_gemm_3x4c8__ssse3_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, 3, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse2_params, benchmark::utils::CheckSSSE3);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, benchmark::utils::CheckSSSE3);
   }
 
   static void qs8_gemm_2x4c8__ssse3_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, 2, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse2_params, benchmark::utils::CheckSSSE3);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, benchmark::utils::CheckSSSE3);
   }
   static void qs8_gemm_3x4c8__ssse3_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, 3, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse2_params, benchmark::utils::CheckSSSE3);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, benchmark::utils::CheckSSSE3);
   }
 
   static void qs8_gemm_xw_2x4c8__ssse3(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, 2, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse2_params, benchmark::utils::CheckSSSE3, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, benchmark::utils::CheckSSSE3, true);
   }
   static void qs8_gemm_xw_3x4c8__ssse3(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, 3, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse2_params, benchmark::utils::CheckSSSE3, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, benchmark::utils::CheckSSSE3, true);
   }
 
   static void qs8_gemm_2x4c2__sse2_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, 2, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse2_params);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params);
   }
   static void qs8_gemm_3x4c2__sse2_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, 3, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse2_params);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params);
   }
   static void qs8_gemm_4x4c2__sse2_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, 4, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse2_params);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params);
   }
 
   static void qs8_gemm_2x4c2__sse2_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, 2, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse2_params);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params);
   }
   static void qs8_gemm_3x4c2__sse2_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, 3, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse2_params);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params);
   }
   static void qs8_gemm_4x4c2__sse2_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, 4, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse2_params);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params);
   }
 
   static void qs8_gemm_xw_2x4c2__sse2(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__sse2, 2, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse2_params, nullptr, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, nullptr, true);
   }
   static void qs8_gemm_xw_3x4c2__sse2(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__sse2, 3, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse2_params, nullptr, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, nullptr, true);
   }
   static void qs8_gemm_xw_4x4c2__sse2(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, 4, 4, 2, 1,
-      xnn_init_qs8_conv_minmax_sse2_params, nullptr, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, nullptr, true);
   }
 
   static void qs8_gemm_2x4c8__sse2_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, 2, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse2_params);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params);
   }
   static void qs8_gemm_3x4c8__sse2_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, 3, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse2_params);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params);
   }
 
   static void qs8_gemm_2x4c8__sse2_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, 2, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse2_params);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params);
   }
   static void qs8_gemm_3x4c8__sse2_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, 3, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse2_params);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params);
   }
 
   static void qs8_gemm_xw_2x4c8__sse2(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, 2, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse2_params, nullptr, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, nullptr, true);
   }
   static void qs8_gemm_xw_3x4c8__sse2(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, 3, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_sse2_params, nullptr, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, nullptr, true);
   }
 
   BENCHMARK_GEMM(qs8_gemm_2x16c8__avx512skx)
@@ -1142,29 +1142,29 @@
 #if XNN_ARCH_WASMSIMD
   static void qs8_gemm_2x4c8__wasmsimd_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, 2, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_wasmsimd_params);
+      xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params);
   }
   static void qs8_gemm_3x4c8__wasmsimd_ld64(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, 3, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_wasmsimd_params);
+      xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params);
   }
 
   static void qs8_gemm_2x4c8__wasmsimd_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, 2, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_wasmsimd_params);
+      xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params);
   }
   static void qs8_gemm_3x4c8__wasmsimd_ld128(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, 3, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_wasmsimd_params);
+      xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params);
   }
 
   static void qs8_gemm_xw_2x4c8__wasmsimd(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, 2, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_wasmsimd_params, nullptr, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, nullptr, true);
   }
   static void qs8_gemm_xw_3x4c8__wasmsimd(benchmark::State& state, const char* net) {
     GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, 3, 4, 8, 1,
-      xnn_init_qs8_conv_minmax_wasmsimd_params, nullptr, true);
+      xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, nullptr, true);
   }
 
   BENCHMARK_GEMM(qs8_gemm_2x4c8__wasmsimd_ld64)
@@ -1178,28 +1178,28 @@
 
 static void qs8_gemm_2x2__scalar(benchmark::State& state, const char* net) {
   GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, 2, 2, 1, 1,
-    xnn_init_qs8_conv_minmax_scalar_params);
+    xnn_init_qs8_conv_minmax_gemmlowp_scalar_params);
 }
 static void qs8_gemm_3x2__scalar(benchmark::State& state, const char* net) {
   GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, 3, 2, 1, 1,
-    xnn_init_qs8_conv_minmax_scalar_params);
+    xnn_init_qs8_conv_minmax_gemmlowp_scalar_params);
 }
 static void qs8_gemm_4x2__scalar(benchmark::State& state, const char* net) {
   GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, 4, 2, 1, 1,
-    xnn_init_qs8_conv_minmax_scalar_params);
+    xnn_init_qs8_conv_minmax_gemmlowp_scalar_params);
 }
 
 static void qs8_gemm_2x4__scalar(benchmark::State& state, const char* net) {
   GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, 2, 4, 1, 1,
-    xnn_init_qs8_conv_minmax_scalar_params);
+    xnn_init_qs8_conv_minmax_gemmlowp_scalar_params);
 }
 static void qs8_gemm_3x4__scalar(benchmark::State& state, const char* net) {
   GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, 3, 4, 1, 1,
-    xnn_init_qs8_conv_minmax_scalar_params);
+    xnn_init_qs8_conv_minmax_gemmlowp_scalar_params);
 }
 static void qs8_gemm_4x4__scalar(benchmark::State& state, const char* net) {
   GEMMBenchmark(state, xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, 4, 4, 1, 1,
-    xnn_init_qs8_conv_minmax_scalar_params);
+    xnn_init_qs8_conv_minmax_gemmlowp_scalar_params);
 }
 
 BENCHMARK_GEMM(qs8_gemm_2x2__scalar)
diff --git a/scripts/generate-qs8-dwconv.sh b/scripts/generate-qs8-dwconv.sh
index 9574396..64fb185 100755
--- a/scripts/generate-qs8-dwconv.sh
+++ b/scripts/generate-qs8-dwconv.sh
@@ -91,21 +91,37 @@
 tools/xngen src/qs8-dwconv/unipass-sse-mul32.c.in -D CHANNEL_TILE=24 -D KERNEL_TILE=25 -D SSE=4 -D AVX=1 -D XOP=1 -o src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-xop-mul32.c
 
 ################################### x86 AVX2 ##################################
-tools/xngen src/qs8-dwconv/unipass-avx2-mul16.c.in -D CHANNEL_TILE=16 -D KERNEL_TILE=9 -o src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul16.c
-tools/xngen src/qs8-dwconv/unipass-avx2-mul16.c.in -D CHANNEL_TILE=32 -D KERNEL_TILE=9 -o src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul16.c
+tools/xngen src/qs8-dwconv/unipass-avx2-mul16.c.in -D CHANNEL_TILE=16 -D KERNEL_TILE=9 -D REQUANTIZATION=GEMMLOWP -o src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul16.c
+tools/xngen src/qs8-dwconv/unipass-avx2-mul16.c.in -D CHANNEL_TILE=32 -D KERNEL_TILE=9 -D REQUANTIZATION=GEMMLOWP -o src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul16.c
 
-tools/xngen src/qs8-dwconv/unipass-avx2-mul16.c.in -D CHANNEL_TILE=16 -D KERNEL_TILE=25 -o src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul16.c
-tools/xngen src/qs8-dwconv/unipass-avx2-mul16.c.in -D CHANNEL_TILE=32 -D KERNEL_TILE=25 -o src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul16.c
+tools/xngen src/qs8-dwconv/unipass-avx2-mul16.c.in -D CHANNEL_TILE=16 -D KERNEL_TILE=9 -D REQUANTIZATION=FP32     -o src/qs8-dwconv/gen/up16x9-minmax-fp32-avx2-mul16.c
+tools/xngen src/qs8-dwconv/unipass-avx2-mul16.c.in -D CHANNEL_TILE=32 -D KERNEL_TILE=9 -D REQUANTIZATION=FP32     -o src/qs8-dwconv/gen/up32x9-minmax-fp32-avx2-mul16.c
 
-tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=8  -D KERNEL_TILE=9 -o src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx2-mul32.c
-tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=16 -D KERNEL_TILE=9 -o src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul32.c
-tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=24 -D KERNEL_TILE=9 -o src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx2-mul32.c
-tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=32 -D KERNEL_TILE=9 -o src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul32.c
+tools/xngen src/qs8-dwconv/unipass-avx2-mul16.c.in -D CHANNEL_TILE=16 -D KERNEL_TILE=25 -D REQUANTIZATION=GEMMLOWP -o src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul16.c
+tools/xngen src/qs8-dwconv/unipass-avx2-mul16.c.in -D CHANNEL_TILE=32 -D KERNEL_TILE=25 -D REQUANTIZATION=GEMMLOWP -o src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul16.c
 
-tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=8  -D KERNEL_TILE=25 -o src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx2-mul32.c
-tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=16 -D KERNEL_TILE=25 -o src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul32.c
-tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=24 -D KERNEL_TILE=25 -o src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx2-mul32.c
-tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=32 -D KERNEL_TILE=25 -o src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul32.c
+tools/xngen src/qs8-dwconv/unipass-avx2-mul16.c.in -D CHANNEL_TILE=16 -D KERNEL_TILE=25 -D REQUANTIZATION=FP32 -o src/qs8-dwconv/gen/up16x25-minmax-fp32-avx2-mul16.c
+tools/xngen src/qs8-dwconv/unipass-avx2-mul16.c.in -D CHANNEL_TILE=32 -D KERNEL_TILE=25 -D REQUANTIZATION=FP32 -o src/qs8-dwconv/gen/up32x25-minmax-fp32-avx2-mul16.c
+
+tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=8  -D KERNEL_TILE=9 -D REQUANTIZATION=GEMMLOWP -o src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx2-mul32.c
+tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=16 -D KERNEL_TILE=9 -D REQUANTIZATION=GEMMLOWP -o src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul32.c
+tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=24 -D KERNEL_TILE=9 -D REQUANTIZATION=GEMMLOWP -o src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx2-mul32.c
+tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=32 -D KERNEL_TILE=9 -D REQUANTIZATION=GEMMLOWP -o src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul32.c
+
+tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=8  -D KERNEL_TILE=9 -D REQUANTIZATION=FP32 -o src/qs8-dwconv/gen/up8x9-minmax-fp32-avx2-mul32.c
+tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=16 -D KERNEL_TILE=9 -D REQUANTIZATION=FP32 -o src/qs8-dwconv/gen/up16x9-minmax-fp32-avx2-mul32.c
+tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=24 -D KERNEL_TILE=9 -D REQUANTIZATION=FP32 -o src/qs8-dwconv/gen/up24x9-minmax-fp32-avx2-mul32.c
+tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=32 -D KERNEL_TILE=9 -D REQUANTIZATION=FP32 -o src/qs8-dwconv/gen/up32x9-minmax-fp32-avx2-mul32.c
+
+tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=8  -D KERNEL_TILE=25 -D REQUANTIZATION=GEMMLOWP -o src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx2-mul32.c
+tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=16 -D KERNEL_TILE=25 -D REQUANTIZATION=GEMMLOWP -o src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul32.c
+tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=24 -D KERNEL_TILE=25 -D REQUANTIZATION=GEMMLOWP -o src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx2-mul32.c
+tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=32 -D KERNEL_TILE=25 -D REQUANTIZATION=GEMMLOWP -o src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul32.c
+
+tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=8  -D KERNEL_TILE=25 -D REQUANTIZATION=FP32 -o src/qs8-dwconv/gen/up8x25-minmax-fp32-avx2-mul32.c
+tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=16 -D KERNEL_TILE=25 -D REQUANTIZATION=FP32 -o src/qs8-dwconv/gen/up16x25-minmax-fp32-avx2-mul32.c
+tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=24 -D KERNEL_TILE=25 -D REQUANTIZATION=FP32 -o src/qs8-dwconv/gen/up24x25-minmax-fp32-avx2-mul32.c
+tools/xngen src/qs8-dwconv/unipass-avx2-mul32.c.in -D CHANNEL_TILE=32 -D KERNEL_TILE=25 -D REQUANTIZATION=FP32 -o src/qs8-dwconv/gen/up32x25-minmax-fp32-avx2-mul32.c
 
 ################################## x86 AVX512 #################################
 tools/xngen src/qs8-dwconv/unipass-avx512skx-mul32.c.in -D CHANNEL_TILE=16 -D KERNEL_TILE=9 -o src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx512skx-mul32.c
@@ -116,3 +132,4 @@
 
 ################################## Unit tests #################################
 tools/generate-dwconv-test.py --spec test/qs8-dwconv-minmax-gemmlowp.yaml --output test/qs8-dwconv-minmax-gemmlowp.cc
+tools/generate-dwconv-test.py --spec test/qs8-dwconv-minmax-fp32.yaml --output test/qs8-dwconv-minmax-fp32.cc
diff --git a/scripts/generate-qs8-gemm.sh b/scripts/generate-qs8-gemm.sh
index 8665494..49b164c 100755
--- a/scripts/generate-qs8-gemm.sh
+++ b/scripts/generate-qs8-gemm.sh
@@ -277,13 +277,17 @@
 
 ################################### x86 AVX2 ##################################
 ### C8 micro-kernels
-tools/xngen src/qs8-gemm/MRx8c8-avx2.c.in -D MR=1 -D VARIANT=LD128    -o src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-avx2.c
-tools/xngen src/qs8-gemm/MRx8c8-avx2.c.in -D MR=2 -D VARIANT=LD128    -o src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-avx2.c
-tools/xngen src/qs8-gemm/MRx8c8-avx2.c.in -D MR=3 -D VARIANT=LD128    -o src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-avx2.c
+tools/xngen src/qs8-gemm/MRx8c8-avx2.c.in -D MR=1 -D VARIANT=LD128    -D REQUANTIZATION=GEMMLOWP -o src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-avx2.c
+tools/xngen src/qs8-gemm/MRx8c8-avx2.c.in -D MR=2 -D VARIANT=LD128    -D REQUANTIZATION=GEMMLOWP -o src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-avx2.c
+tools/xngen src/qs8-gemm/MRx8c8-avx2.c.in -D MR=3 -D VARIANT=LD128    -D REQUANTIZATION=GEMMLOWP -o src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-avx2.c
 
-tools/xngen src/qs8-gemm/MRx8c8-avx2.c.in -D MR=1 -D VARIANT=EXTENDED -o src/qs8-gemm/gen/1x8c8-xw-minmax-gemmlowp-avx2.c
-tools/xngen src/qs8-gemm/MRx8c8-avx2.c.in -D MR=2 -D VARIANT=EXTENDED -o src/qs8-gemm/gen/2x8c8-xw-minmax-gemmlowp-avx2.c
-tools/xngen src/qs8-gemm/MRx8c8-avx2.c.in -D MR=3 -D VARIANT=EXTENDED -o src/qs8-gemm/gen/3x8c8-xw-minmax-gemmlowp-avx2.c
+tools/xngen src/qs8-gemm/MRx8c8-avx2.c.in -D MR=1 -D VARIANT=LD128    -D REQUANTIZATION=FP32     -o src/qs8-gemm/gen/1x8c8-minmax-fp32-avx2.c
+tools/xngen src/qs8-gemm/MRx8c8-avx2.c.in -D MR=2 -D VARIANT=LD128    -D REQUANTIZATION=FP32     -o src/qs8-gemm/gen/2x8c8-minmax-fp32-avx2.c
+tools/xngen src/qs8-gemm/MRx8c8-avx2.c.in -D MR=3 -D VARIANT=LD128    -D REQUANTIZATION=FP32     -o src/qs8-gemm/gen/3x8c8-minmax-fp32-avx2.c
+
+tools/xngen src/qs8-gemm/MRx8c8-avx2.c.in -D MR=1 -D VARIANT=EXTENDED -D REQUANTIZATION=GEMMLOWP -o src/qs8-gemm/gen/1x8c8-xw-minmax-gemmlowp-avx2.c
+tools/xngen src/qs8-gemm/MRx8c8-avx2.c.in -D MR=2 -D VARIANT=EXTENDED -D REQUANTIZATION=GEMMLOWP -o src/qs8-gemm/gen/2x8c8-xw-minmax-gemmlowp-avx2.c
+tools/xngen src/qs8-gemm/MRx8c8-avx2.c.in -D MR=3 -D VARIANT=EXTENDED -D REQUANTIZATION=GEMMLOWP -o src/qs8-gemm/gen/3x8c8-xw-minmax-gemmlowp-avx2.c
 
 ################################## x86 AVX512 #################################
 ### C8 micro-kernels
@@ -294,3 +298,4 @@
 
 ################################## Unit tests #################################
 tools/generate-gemm-test.py --spec test/qs8-gemm-minmax-gemmlowp.yaml --output test/qs8-gemm-minmax-gemmlowp.cc
+tools/generate-gemm-test.py --spec test/qs8-gemm-minmax-fp32.yaml --output test/qs8-gemm-minmax-fp32.cc
diff --git a/scripts/generate-qs8-igemm.sh b/scripts/generate-qs8-igemm.sh
index a8fdfaa..44598f0 100755
--- a/scripts/generate-qs8-igemm.sh
+++ b/scripts/generate-qs8-igemm.sh
@@ -228,9 +228,13 @@
 
 ################################### x86 AVX2 ##################################
 ### C8 micro-kernels
-tools/xngen src/qs8-igemm/MRx8c8-avx2.c.in -D MR=1 -o src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-avx2.c
-tools/xngen src/qs8-igemm/MRx8c8-avx2.c.in -D MR=2 -o src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-avx2.c
-tools/xngen src/qs8-igemm/MRx8c8-avx2.c.in -D MR=3 -o src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-avx2.c
+tools/xngen src/qs8-igemm/MRx8c8-avx2.c.in -D MR=1 -D REQUANTIZATION=GEMMLOWP -o src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-avx2.c
+tools/xngen src/qs8-igemm/MRx8c8-avx2.c.in -D MR=2 -D REQUANTIZATION=GEMMLOWP -o src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-avx2.c
+tools/xngen src/qs8-igemm/MRx8c8-avx2.c.in -D MR=3 -D REQUANTIZATION=GEMMLOWP -o src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-avx2.c
+
+tools/xngen src/qs8-igemm/MRx8c8-avx2.c.in -D MR=1 -D REQUANTIZATION=FP32     -o src/qs8-igemm/gen/1x8c8-minmax-fp32-avx2.c
+tools/xngen src/qs8-igemm/MRx8c8-avx2.c.in -D MR=2 -D REQUANTIZATION=FP32     -o src/qs8-igemm/gen/2x8c8-minmax-fp32-avx2.c
+tools/xngen src/qs8-igemm/MRx8c8-avx2.c.in -D MR=3 -D REQUANTIZATION=FP32     -o src/qs8-igemm/gen/3x8c8-minmax-fp32-avx2.c
 
 ################################## x86 AVX512 #################################
 ### C8 micro-kernels
@@ -241,3 +245,4 @@
 
 ################################## Unit tests #################################
 tools/generate-gemm-test.py --spec test/qs8-igemm-minmax-gemmlowp.yaml --output test/qs8-igemm-minmax-gemmlowp.cc
+tools/generate-gemm-test.py --spec test/qs8-igemm-minmax-fp32.yaml --output test/qs8-igemm-minmax-fp32.cc
diff --git a/src/init.c b/src/init.c
index a422804..a95df77 100644
--- a/src/init.c
+++ b/src/init.c
@@ -114,7 +114,7 @@
         xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot);
         xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot);
         xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot);
-        xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_neon_params;
+        xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_neon_params;
         xnn_params.qs8.gemm.mr = 4;
         xnn_params.qs8.gemm.nr = 8;
         xnn_params.qs8.gemm.log2_kr = 2;
@@ -123,18 +123,18 @@
         xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup);
         xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup);
         xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup);
-        xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_neon_params;
+        xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_neon_params;
         xnn_params.qs8.gemm.mr = 2;
         xnn_params.qs8.gemm.nr = 8;
         xnn_params.qs8.gemm.log2_kr = 1;
       }
 
       xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16;
-      xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_neon_params;
+      xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_neon_params;
       xnn_params.qs8.dwconv[0].channel_tile = 8;
       xnn_params.qs8.dwconv[0].primary_tile = 9;
       xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16;
-      xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_neon_params;
+      xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_neon_params;
       xnn_params.qs8.dwconv[1].channel_tile = 8;
       xnn_params.qs8.dwconv[1].primary_tile = 25;
 
@@ -845,7 +845,7 @@
           xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot);
           xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64);
           xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot);
-          xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_neon_params;
+          xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_neon_params;
           xnn_params.qs8.gemm.mr = 4;
           xnn_params.qs8.gemm.nr = 16;
           xnn_params.qs8.gemm.log2_kr = 2;
@@ -854,7 +854,7 @@
           xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal);
           xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal);
           xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal);
-          xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_neon_params;
+          xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_neon_params;
           xnn_params.qs8.gemm.mr = 2;
           xnn_params.qs8.gemm.nr = 8;
           xnn_params.qs8.gemm.log2_kr = 3;
@@ -865,7 +865,7 @@
           xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot);
           xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot);
           xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot);
-          xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_neon_params;
+          xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_neon_params;
           xnn_params.qs8.gemm.mr = 4;
           xnn_params.qs8.gemm.nr = 16;
           xnn_params.qs8.gemm.log2_kr = 2;
@@ -874,7 +874,7 @@
           xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup);
           xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup);
           xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup);
-          xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_neon_params;
+          xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_neon_params;
           xnn_params.qs8.gemm.mr = 2;
           xnn_params.qs8.gemm.nr = 8;
           xnn_params.qs8.gemm.log2_kr = 1;
@@ -895,7 +895,7 @@
           }
           xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot);
           xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot);
-          xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_neon_params;
+          xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_neon_params;
           xnn_params.qs8.gemm.mr = 4;
           xnn_params.qs8.gemm.nr = 16;
           xnn_params.qs8.gemm.log2_kr = 2;
@@ -907,7 +907,7 @@
               xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53);
               xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane);
               xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane);
-              xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_neon_params;
+              xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_neon_params;
               xnn_params.qs8.gemm.mr = 4;
               xnn_params.qs8.gemm.nr = 16;
               break;
@@ -919,7 +919,7 @@
               xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm);
               xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm);
               xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm);
-              xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_neon_params;
+              xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_neon_params;
               xnn_params.qs8.gemm.mr = 2;
               xnn_params.qs8.gemm.nr = 8;
               xnn_params.qs8.gemm.log2_kr = 3;
@@ -930,7 +930,7 @@
               xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal);
               xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal);
               xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal);
-              xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_neon_params;
+              xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_neon_params;
               xnn_params.qs8.gemm.mr = 2;
               xnn_params.qs8.gemm.nr = 8;
               xnn_params.qs8.gemm.log2_kr = 3;
@@ -981,7 +981,7 @@
           xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot);
           xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot);
           xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot);
-          xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_neon_params;
+          xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_neon_params;
           xnn_params.qs8.gemm.mr = 4;
           xnn_params.qs8.gemm.nr = 16;
           xnn_params.qs8.gemm.log2_kr = 2;
@@ -990,7 +990,7 @@
           xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup);
           xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup);
           xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup);
-          xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_neon_params;
+          xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_neon_params;
           xnn_params.qs8.gemm.mr = 2;
           xnn_params.qs8.gemm.nr = 8;
           xnn_params.qs8.gemm.log2_kr = 1;
@@ -999,11 +999,11 @@
     #endif  // XNN_PLATFORM_IOS || XNN_PLATFORM_MAC
 
     xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16;
-    xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_neon_params;
+    xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_neon_params;
     xnn_params.qs8.dwconv[0].channel_tile = 8;
     xnn_params.qs8.dwconv[0].primary_tile = 9;
     xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16;
-    xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_neon_params;
+    xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_neon_params;
     xnn_params.qs8.dwconv[1].channel_tile = 8;
     xnn_params.qs8.dwconv[1].primary_tile = 25;
 
@@ -1564,7 +1564,7 @@
       xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx);
       xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx);
       xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx);
-      xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_sse4_params;
+      xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_sse4_params;
       xnn_params.qs8.gemm.mr = 4;
       xnn_params.qs8.gemm.nr = 16;
       xnn_params.qs8.gemm.log2_kr = 3;
@@ -1574,16 +1574,16 @@
       xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64);
       xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64);
       xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64);
-      xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_sse4_params;
+      xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_sse4_params;
       xnn_params.qs8.gemm.mr = 2;
       xnn_params.qs8.gemm.nr = 4;
       xnn_params.qs8.gemm.log2_kr = 3;
     } else if (cpuinfo_has_x86_avx2()) {
-      xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2);
-      xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2);
-      xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2);
-      xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2);
-      xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_avx2_params;
+      xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2);
+      xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2);
+      xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2);
+      xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2);
+      xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_fp32_avx2_params;
       xnn_params.qs8.gemm.mr = 3;
       xnn_params.qs8.gemm.nr = 8;
       xnn_params.qs8.gemm.log2_kr = 3;
@@ -1592,7 +1592,7 @@
       xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128);
       xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128);
       xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128);
-      xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_sse4_params;
+      xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_sse4_params;
       xnn_params.qs8.gemm.mr = 2;
       xnn_params.qs8.gemm.nr = 4;
       xnn_params.qs8.gemm.log2_kr = 3;
@@ -1601,7 +1601,7 @@
       xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64);
       xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64);
       xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64);
-      xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_sse4_params;
+      xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_sse4_params;
       xnn_params.qs8.gemm.mr = 3;
       xnn_params.qs8.gemm.nr = 4;
       xnn_params.qs8.gemm.log2_kr = 3;
@@ -1610,7 +1610,7 @@
       xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64);
       xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64);
       xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64);
-      xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_sse2_params;
+      xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_sse2_params;
       xnn_params.qs8.gemm.mr = 3;
       xnn_params.qs8.gemm.nr = 4;
       xnn_params.qs8.gemm.log2_kr = 3;
@@ -1619,7 +1619,7 @@
       xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64);
       xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64);
       xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64);
-      xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_sse2_params;
+      xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_sse2_params;
       xnn_params.qs8.gemm.mr = 3;
       xnn_params.qs8.gemm.nr = 4;
       xnn_params.qs8.gemm.log2_kr = 3;
@@ -1627,53 +1627,53 @@
 
     if (cpuinfo_has_x86_avx512f() && cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512vl()) {
       xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32;
-      xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_sse2_params;
+      xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_sse2_params;
       xnn_params.qs8.dwconv[0].channel_tile = 32;
       xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32;
-      xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_sse2_params;
+      xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_sse2_params;
       xnn_params.qs8.dwconv[1].channel_tile = 32;
     } else if (cpuinfo_has_x86_xop()) {
       // XOP should be checked before AVX2: AMD Excavator supports both, but performs better with XOP microkernels
       xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32;
-      xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_sse4_params;
+      xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_sse4_params;
       xnn_params.qs8.dwconv[0].channel_tile = 16;
       xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32;
-      xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_sse4_params;
+      xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_sse4_params;
       xnn_params.qs8.dwconv[1].channel_tile = 16;
     } else if (cpuinfo_has_x86_avx2()) {
-      xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32;
-      xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_avx2_params;
+      xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32;
+      xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_fp32_avx2_params;
       xnn_params.qs8.dwconv[0].channel_tile = 16;
-      xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32;
-      xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_avx2_params;
+      xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32;
+      xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_fp32_avx2_params;
       xnn_params.qs8.dwconv[1].channel_tile = 16;
     } else if (cpuinfo_has_x86_avx()) {
       xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32;
-      xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_sse4_params;
+      xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_sse4_params;
       xnn_params.qs8.dwconv[0].channel_tile = 16;
       xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32;
-      xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_sse4_params;
+      xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_sse4_params;
       xnn_params.qs8.dwconv[1].channel_tile = 16;
     } else if (cpuinfo_has_x86_sse4_1()) {
       xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16;
-      xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_sse4_params;
+      xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_sse4_params;
       xnn_params.qs8.dwconv[0].channel_tile = 8;
       xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16;
-      xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_sse4_params;
+      xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_sse4_params;
       xnn_params.qs8.dwconv[1].channel_tile = 8;
     } else if (cpuinfo_has_x86_ssse3()) {
       xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16;
-      xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_sse2_params;
+      xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_sse2_params;
       xnn_params.qs8.dwconv[0].channel_tile = 8;
       xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16;
-      xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_sse2_params;
+      xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_sse2_params;
       xnn_params.qs8.dwconv[1].channel_tile = 8;
     } else if (cpuinfo_has_x86_sse2()) {
       xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16;
-      xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_sse2_params;
+      xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_sse2_params;
       xnn_params.qs8.dwconv[0].channel_tile = 8;
       xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16;
-      xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_sse2_params;
+      xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_sse2_params;
       xnn_params.qs8.dwconv[1].channel_tile = 8;
     }
     xnn_params.qs8.dwconv[0].primary_tile = 9;
@@ -2306,17 +2306,17 @@
     xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64);
     xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64);
     xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64);
-    xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_wasmsimd_params;
+    xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params;
     xnn_params.qs8.gemm.mr = 3;
     xnn_params.qs8.gemm.nr = 4;
     xnn_params.qs8.gemm.log2_kr = 3;
 
     xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16;
-    xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_wasmsimd_params;
+    xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params;
     xnn_params.qs8.dwconv[0].channel_tile = 8;
     xnn_params.qs8.dwconv[0].primary_tile = 9;
     xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16;
-    xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_wasmsimd_params;
+    xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params;
     xnn_params.qs8.dwconv[1].channel_tile = 8;
     xnn_params.qs8.dwconv[1].primary_tile = 25;
 
@@ -2843,7 +2843,7 @@
       xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar);
       xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar);
       xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar);
-      xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_scalar_params;
+      xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_scalar_params;
       xnn_params.qs8.gemm.mr = 2;
       xnn_params.qs8.gemm.nr = 2;
     } else {
@@ -2851,17 +2851,17 @@
       xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar);
       xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar);
       xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar);
-      xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_scalar_params;
+      xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_scalar_params;
       xnn_params.qs8.gemm.mr = 4;
       xnn_params.qs8.gemm.nr = 4;
     }
 
     xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar;
-    xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_scalar_params;
+    xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_scalar_params;
     xnn_params.qs8.dwconv[0].channel_tile = 2;
     xnn_params.qs8.dwconv[0].primary_tile = 9;
     xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar;
-    xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_scalar_params;
+    xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_scalar_params;
     xnn_params.qs8.dwconv[1].channel_tile = 2;
     xnn_params.qs8.dwconv[1].primary_tile = 25;
 
@@ -3229,16 +3229,16 @@
     xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar);
     xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_function) xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar);
     xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_function) xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar);
-    xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_scalar_params;
+    xnn_params.qs8.gemm.init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_scalar_params;
     xnn_params.qs8.gemm.mr = 3;
     xnn_params.qs8.gemm.nr = 4;
 
     xnn_params.qs8.dwconv[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar;
-    xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_scalar_params;
+    xnn_params.qs8.dwconv[0].init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_scalar_params;
     xnn_params.qs8.dwconv[0].channel_tile = 2;
     xnn_params.qs8.dwconv[0].primary_tile = 9;
     xnn_params.qs8.dwconv[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_function) xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar;
-    xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_scalar_params;
+    xnn_params.qs8.dwconv[1].init.qs8 = xnn_init_qs8_conv_minmax_gemmlowp_scalar_params;
     xnn_params.qs8.dwconv[1].channel_tile = 2;
     xnn_params.qs8.dwconv[1].primary_tile = 25;
 
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-fp32-avx2-mul16.c b/src/qs8-dwconv/gen/up16x25-minmax-fp32-avx2-mul16.c
new file mode 100644
index 0000000..e8938a0
--- /dev/null
+++ b/src/qs8-dwconv/gen/up16x25-minmax-fp32-avx2-mul16.c
@@ -0,0 +1,667 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-dwconv/unipass-avx2-mul16.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <immintrin.h>
+
+#include <xnnpack/dwconv.h>
+
+
+void xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16(
+    size_t channels,
+    size_t output_width,
+    const int8_t** input,
+    const void* weights,
+    int8_t* output,
+    size_t input_stride,
+    size_t output_increment,
+    size_t input_offset,
+    const int8_t* zero,
+    const union xnn_qs8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(channels != 0);
+  assert(output_width != 0);
+
+  do {
+    const int8_t* i0 = input[0];
+    assert(i0 != NULL);
+    if XNN_UNPREDICTABLE(i0 != zero) {
+      i0 = (const int8_t*) ((uintptr_t) i0 + input_offset);
+    }
+    const int8_t* i1 = input[1];
+    assert(i1 != NULL);
+    if XNN_UNPREDICTABLE(i1 != zero) {
+      i1 = (const int8_t*) ((uintptr_t) i1 + input_offset);
+    }
+    const int8_t* i2 = input[2];
+    assert(i2 != NULL);
+    if XNN_UNPREDICTABLE(i2 != zero) {
+      i2 = (const int8_t*) ((uintptr_t) i2 + input_offset);
+    }
+    const int8_t* i3 = input[3];
+    assert(i3 != NULL);
+    if XNN_UNPREDICTABLE(i3 != zero) {
+      i3 = (const int8_t*) ((uintptr_t) i3 + input_offset);
+    }
+    const int8_t* i4 = input[4];
+    assert(i4 != NULL);
+    if XNN_UNPREDICTABLE(i4 != zero) {
+      i4 = (const int8_t*) ((uintptr_t) i4 + input_offset);
+    }
+    const int8_t* i5 = input[5];
+    assert(i5 != NULL);
+    if XNN_UNPREDICTABLE(i5 != zero) {
+      i5 = (const int8_t*) ((uintptr_t) i5 + input_offset);
+    }
+    const int8_t* i6 = input[6];
+    assert(i6 != NULL);
+    if XNN_UNPREDICTABLE(i6 != zero) {
+      i6 = (const int8_t*) ((uintptr_t) i6 + input_offset);
+    }
+    const int8_t* i7 = input[7];
+    assert(i7 != NULL);
+    if XNN_UNPREDICTABLE(i7 != zero) {
+      i7 = (const int8_t*) ((uintptr_t) i7 + input_offset);
+    }
+    const int8_t* i8 = input[8];
+    assert(i8 != NULL);
+    if XNN_UNPREDICTABLE(i8 != zero) {
+      i8 = (const int8_t*) ((uintptr_t) i8 + input_offset);
+    }
+    const int8_t* i9 = input[9];
+    assert(i9 != NULL);
+    if XNN_UNPREDICTABLE(i9 != zero) {
+      i9 = (const int8_t*) ((uintptr_t) i9 + input_offset);
+    }
+    const int8_t* i10 = input[10];
+    assert(i10 != NULL);
+    if XNN_UNPREDICTABLE(i10 != zero) {
+      i10 = (const int8_t*) ((uintptr_t) i10 + input_offset);
+    }
+    const int8_t* i11 = input[11];
+    assert(i11 != NULL);
+    if XNN_UNPREDICTABLE(i11 != zero) {
+      i11 = (const int8_t*) ((uintptr_t) i11 + input_offset);
+    }
+    const int8_t* i12 = input[12];
+    assert(i12 != NULL);
+    if XNN_UNPREDICTABLE(i12 != zero) {
+      i12 = (const int8_t*) ((uintptr_t) i12 + input_offset);
+    }
+    const int8_t* i13 = input[13];
+    assert(i13 != NULL);
+    if XNN_UNPREDICTABLE(i13 != zero) {
+      i13 = (const int8_t*) ((uintptr_t) i13 + input_offset);
+    }
+    const int8_t* i14 = input[14];
+    assert(i14 != NULL);
+    if XNN_UNPREDICTABLE(i14 != zero) {
+      i14 = (const int8_t*) ((uintptr_t) i14 + input_offset);
+    }
+    const int8_t* i15 = input[15];
+    assert(i15 != NULL);
+    if XNN_UNPREDICTABLE(i15 != zero) {
+      i15 = (const int8_t*) ((uintptr_t) i15 + input_offset);
+    }
+    const int8_t* i16 = input[16];
+    assert(i16 != NULL);
+    if XNN_UNPREDICTABLE(i16 != zero) {
+      i16 = (const int8_t*) ((uintptr_t) i16 + input_offset);
+    }
+    const int8_t* i17 = input[17];
+    assert(i17 != NULL);
+    if XNN_UNPREDICTABLE(i17 != zero) {
+      i17 = (const int8_t*) ((uintptr_t) i17 + input_offset);
+    }
+    const int8_t* i18 = input[18];
+    assert(i18 != NULL);
+    if XNN_UNPREDICTABLE(i18 != zero) {
+      i18 = (const int8_t*) ((uintptr_t) i18 + input_offset);
+    }
+    const int8_t* i19 = input[19];
+    assert(i19 != NULL);
+    if XNN_UNPREDICTABLE(i19 != zero) {
+      i19 = (const int8_t*) ((uintptr_t) i19 + input_offset);
+    }
+    const int8_t* i20 = input[20];
+    assert(i20 != NULL);
+    if XNN_UNPREDICTABLE(i20 != zero) {
+      i20 = (const int8_t*) ((uintptr_t) i20 + input_offset);
+    }
+    const int8_t* i21 = input[21];
+    assert(i21 != NULL);
+    if XNN_UNPREDICTABLE(i21 != zero) {
+      i21 = (const int8_t*) ((uintptr_t) i21 + input_offset);
+    }
+    const int8_t* i22 = input[22];
+    assert(i22 != NULL);
+    if XNN_UNPREDICTABLE(i22 != zero) {
+      i22 = (const int8_t*) ((uintptr_t) i22 + input_offset);
+    }
+    const int8_t* i23 = input[23];
+    assert(i23 != NULL);
+    if XNN_UNPREDICTABLE(i23 != zero) {
+      i23 = (const int8_t*) ((uintptr_t) i23 + input_offset);
+    }
+    const int8_t* i24 = input[24];
+    assert(i24 != NULL);
+    if XNN_UNPREDICTABLE(i24 != zero) {
+      i24 = (const int8_t*) ((uintptr_t) i24 + input_offset);
+    }
+    input = (const int8_t**) ((uintptr_t) input + input_stride);
+
+    size_t c = channels;
+    const void* w = weights;
+    for (; c >= 16; c -= 16) {
+      __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+      __m256i vacc89ABCDEF = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 8 * sizeof(int32_t)));
+
+
+      const __m256i vi0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i0));
+      const __m256i vk0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 0 * sizeof(int8_t))));
+      i0 += 16;
+
+      const __m256i vprod0x0123456789ABCDEF =  _mm256_mullo_epi16(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF);
+      const __m128i vprod0x89ABCDEF = _mm256_extracti128_si256(vprod0x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod0x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod0x89ABCDEF));
+
+      const __m256i vi1x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i1));
+      const __m256i vk1x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 16 * sizeof(int8_t))));
+      i1 += 16;
+
+      const __m256i vprod1x0123456789ABCDEF =  _mm256_mullo_epi16(vi1x0123456789ABCDEF, vk1x0123456789ABCDEF);
+      const __m128i vprod1x89ABCDEF = _mm256_extracti128_si256(vprod1x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod1x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod1x89ABCDEF));
+
+      const __m256i vi2x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i2));
+      const __m256i vk2x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 32 * sizeof(int8_t))));
+      i2 += 16;
+
+      const __m256i vprod2x0123456789ABCDEF =  _mm256_mullo_epi16(vi2x0123456789ABCDEF, vk2x0123456789ABCDEF);
+      const __m128i vprod2x89ABCDEF = _mm256_extracti128_si256(vprod2x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod2x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod2x89ABCDEF));
+
+      const __m256i vi3x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i3));
+      const __m256i vk3x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 48 * sizeof(int8_t))));
+      i3 += 16;
+
+      const __m256i vprod3x0123456789ABCDEF =  _mm256_mullo_epi16(vi3x0123456789ABCDEF, vk3x0123456789ABCDEF);
+      const __m128i vprod3x89ABCDEF = _mm256_extracti128_si256(vprod3x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod3x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod3x89ABCDEF));
+
+      const __m256i vi4x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i4));
+      const __m256i vk4x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 64 * sizeof(int8_t))));
+      i4 += 16;
+
+      const __m256i vprod4x0123456789ABCDEF =  _mm256_mullo_epi16(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF);
+      const __m128i vprod4x89ABCDEF = _mm256_extracti128_si256(vprod4x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod4x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod4x89ABCDEF));
+
+      const __m256i vi5x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i5));
+      const __m256i vk5x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 80 * sizeof(int8_t))));
+      i5 += 16;
+
+      const __m256i vprod5x0123456789ABCDEF =  _mm256_mullo_epi16(vi5x0123456789ABCDEF, vk5x0123456789ABCDEF);
+      const __m128i vprod5x89ABCDEF = _mm256_extracti128_si256(vprod5x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod5x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod5x89ABCDEF));
+
+      const __m256i vi6x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i6));
+      const __m256i vk6x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 96 * sizeof(int8_t))));
+      i6 += 16;
+
+      const __m256i vprod6x0123456789ABCDEF =  _mm256_mullo_epi16(vi6x0123456789ABCDEF, vk6x0123456789ABCDEF);
+      const __m128i vprod6x89ABCDEF = _mm256_extracti128_si256(vprod6x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod6x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod6x89ABCDEF));
+
+      const __m256i vi7x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i7));
+      const __m256i vk7x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 112 * sizeof(int8_t))));
+      i7 += 16;
+
+      const __m256i vprod7x0123456789ABCDEF =  _mm256_mullo_epi16(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF);
+      const __m128i vprod7x89ABCDEF = _mm256_extracti128_si256(vprod7x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod7x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod7x89ABCDEF));
+
+      const __m256i vi8x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i8));
+      const __m256i vk8x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 128 * sizeof(int8_t))));
+      i8 += 16;
+
+      const __m256i vprod8x0123456789ABCDEF =  _mm256_mullo_epi16(vi8x0123456789ABCDEF, vk8x0123456789ABCDEF);
+      const __m128i vprod8x89ABCDEF = _mm256_extracti128_si256(vprod8x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod8x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod8x89ABCDEF));
+
+      const __m256i vi9x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i9));
+      const __m256i vk9x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 144 * sizeof(int8_t))));
+      i9 += 16;
+
+      const __m256i vprod9x0123456789ABCDEF =  _mm256_mullo_epi16(vi9x0123456789ABCDEF, vk9x0123456789ABCDEF);
+      const __m128i vprod9x89ABCDEF = _mm256_extracti128_si256(vprod9x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod9x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod9x89ABCDEF));
+
+      const __m256i vi10x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i10));
+      const __m256i vk10x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 160 * sizeof(int8_t))));
+      i10 += 16;
+
+      const __m256i vprod10x0123456789ABCDEF =  _mm256_mullo_epi16(vi10x0123456789ABCDEF, vk10x0123456789ABCDEF);
+      const __m128i vprod10x89ABCDEF = _mm256_extracti128_si256(vprod10x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod10x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod10x89ABCDEF));
+
+      const __m256i vi11x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i11));
+      const __m256i vk11x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 176 * sizeof(int8_t))));
+      i11 += 16;
+
+      const __m256i vprod11x0123456789ABCDEF =  _mm256_mullo_epi16(vi11x0123456789ABCDEF, vk11x0123456789ABCDEF);
+      const __m128i vprod11x89ABCDEF = _mm256_extracti128_si256(vprod11x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod11x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod11x89ABCDEF));
+
+      const __m256i vi12x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i12));
+      const __m256i vk12x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 192 * sizeof(int8_t))));
+      i12 += 16;
+
+      const __m256i vprod12x0123456789ABCDEF =  _mm256_mullo_epi16(vi12x0123456789ABCDEF, vk12x0123456789ABCDEF);
+      const __m128i vprod12x89ABCDEF = _mm256_extracti128_si256(vprod12x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod12x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod12x89ABCDEF));
+
+      const __m256i vi13x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i13));
+      const __m256i vk13x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 208 * sizeof(int8_t))));
+      i13 += 16;
+
+      const __m256i vprod13x0123456789ABCDEF =  _mm256_mullo_epi16(vi13x0123456789ABCDEF, vk13x0123456789ABCDEF);
+      const __m128i vprod13x89ABCDEF = _mm256_extracti128_si256(vprod13x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod13x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod13x89ABCDEF));
+
+      const __m256i vi14x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i14));
+      const __m256i vk14x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 224 * sizeof(int8_t))));
+      i14 += 16;
+
+      const __m256i vprod14x0123456789ABCDEF =  _mm256_mullo_epi16(vi14x0123456789ABCDEF, vk14x0123456789ABCDEF);
+      const __m128i vprod14x89ABCDEF = _mm256_extracti128_si256(vprod14x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod14x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod14x89ABCDEF));
+
+      const __m256i vi15x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i15));
+      const __m256i vk15x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 240 * sizeof(int8_t))));
+      i15 += 16;
+
+      const __m256i vprod15x0123456789ABCDEF =  _mm256_mullo_epi16(vi15x0123456789ABCDEF, vk15x0123456789ABCDEF);
+      const __m128i vprod15x89ABCDEF = _mm256_extracti128_si256(vprod15x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod15x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod15x89ABCDEF));
+
+      const __m256i vi16x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i16));
+      const __m256i vk16x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 256 * sizeof(int8_t))));
+      i16 += 16;
+
+      const __m256i vprod16x0123456789ABCDEF =  _mm256_mullo_epi16(vi16x0123456789ABCDEF, vk16x0123456789ABCDEF);
+      const __m128i vprod16x89ABCDEF = _mm256_extracti128_si256(vprod16x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod16x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod16x89ABCDEF));
+
+      const __m256i vi17x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i17));
+      const __m256i vk17x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 272 * sizeof(int8_t))));
+      i17 += 16;
+
+      const __m256i vprod17x0123456789ABCDEF =  _mm256_mullo_epi16(vi17x0123456789ABCDEF, vk17x0123456789ABCDEF);
+      const __m128i vprod17x89ABCDEF = _mm256_extracti128_si256(vprod17x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod17x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod17x89ABCDEF));
+
+      const __m256i vi18x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i18));
+      const __m256i vk18x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 288 * sizeof(int8_t))));
+      i18 += 16;
+
+      const __m256i vprod18x0123456789ABCDEF =  _mm256_mullo_epi16(vi18x0123456789ABCDEF, vk18x0123456789ABCDEF);
+      const __m128i vprod18x89ABCDEF = _mm256_extracti128_si256(vprod18x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod18x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod18x89ABCDEF));
+
+      const __m256i vi19x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i19));
+      const __m256i vk19x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 304 * sizeof(int8_t))));
+      i19 += 16;
+
+      const __m256i vprod19x0123456789ABCDEF =  _mm256_mullo_epi16(vi19x0123456789ABCDEF, vk19x0123456789ABCDEF);
+      const __m128i vprod19x89ABCDEF = _mm256_extracti128_si256(vprod19x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod19x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod19x89ABCDEF));
+
+      const __m256i vi20x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i20));
+      const __m256i vk20x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 320 * sizeof(int8_t))));
+      i20 += 16;
+
+      const __m256i vprod20x0123456789ABCDEF =  _mm256_mullo_epi16(vi20x0123456789ABCDEF, vk20x0123456789ABCDEF);
+      const __m128i vprod20x89ABCDEF = _mm256_extracti128_si256(vprod20x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod20x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod20x89ABCDEF));
+
+      const __m256i vi21x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i21));
+      const __m256i vk21x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 336 * sizeof(int8_t))));
+      i21 += 16;
+
+      const __m256i vprod21x0123456789ABCDEF =  _mm256_mullo_epi16(vi21x0123456789ABCDEF, vk21x0123456789ABCDEF);
+      const __m128i vprod21x89ABCDEF = _mm256_extracti128_si256(vprod21x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod21x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod21x89ABCDEF));
+
+      const __m256i vi22x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i22));
+      const __m256i vk22x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 352 * sizeof(int8_t))));
+      i22 += 16;
+
+      const __m256i vprod22x0123456789ABCDEF =  _mm256_mullo_epi16(vi22x0123456789ABCDEF, vk22x0123456789ABCDEF);
+      const __m128i vprod22x89ABCDEF = _mm256_extracti128_si256(vprod22x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod22x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod22x89ABCDEF));
+
+      const __m256i vi23x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i23));
+      const __m256i vk23x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 368 * sizeof(int8_t))));
+      i23 += 16;
+
+      const __m256i vprod23x0123456789ABCDEF =  _mm256_mullo_epi16(vi23x0123456789ABCDEF, vk23x0123456789ABCDEF);
+      const __m128i vprod23x89ABCDEF = _mm256_extracti128_si256(vprod23x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod23x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod23x89ABCDEF));
+
+      const __m256i vi24x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i24));
+      const __m256i vk24x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 384 * sizeof(int8_t))));
+      i24 += 16;
+
+      const __m256i vprod24x0123456789ABCDEF =  _mm256_mullo_epi16(vi24x0123456789ABCDEF, vk24x0123456789ABCDEF);
+      const __m128i vprod24x89ABCDEF = _mm256_extracti128_si256(vprod24x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod24x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod24x89ABCDEF));
+
+      w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t) + 400 * sizeof(int8_t));
+      __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+      __m256 vscaled89ABCDEF = _mm256_cvtepi32_ps(vacc89ABCDEF);
+
+      const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+      vscaled01234567 = _mm256_mul_ps(vscaled01234567, vscale);
+      vscaled89ABCDEF = _mm256_mul_ps(vscaled89ABCDEF, vscale);
+
+      vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+      vacc89ABCDEF = _mm256_cvtps_epi32(vscaled89ABCDEF);
+
+      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->fp32_avx2.output_zero_point);
+      const __m256i vout012389AB4567CDEF = _mm256_adds_epi16(_mm256_packs_epi32(vacc01234567, vacc89ABCDEF), voutput_zero_point);
+
+      __m128i vout0123456789ABCDEF = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(vout012389AB4567CDEF), _mm256_extracti128_si256(vout012389AB4567CDEF, 1)), _MM_SHUFFLE(3, 1, 2, 0));
+
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+      vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
+      vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
+
+      _mm_storeu_si128((__m128i*) output, vout0123456789ABCDEF);
+      output += 16;
+    }
+    if XNN_UNLIKELY(c != 0) {
+      {
+        __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+        __m256i vacc89ABCDEF = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 8 * sizeof(int32_t)));
+
+
+        const __m256i vi0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i0));
+        const __m256i vk0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 0 * sizeof(int8_t))));
+
+        const __m256i vprod0x0123456789ABCDEF = _mm256_mullo_epi16(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF);
+        const __m128i vprod0x89ABCDEF = _mm256_extracti128_si256(vprod0x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod0x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod0x89ABCDEF));
+
+        const __m256i vi1x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i1));
+        const __m256i vk1x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 16 * sizeof(int8_t))));
+
+        const __m256i vprod1x0123456789ABCDEF = _mm256_mullo_epi16(vi1x0123456789ABCDEF, vk1x0123456789ABCDEF);
+        const __m128i vprod1x89ABCDEF = _mm256_extracti128_si256(vprod1x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod1x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod1x89ABCDEF));
+
+        const __m256i vi2x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i2));
+        const __m256i vk2x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 32 * sizeof(int8_t))));
+
+        const __m256i vprod2x0123456789ABCDEF = _mm256_mullo_epi16(vi2x0123456789ABCDEF, vk2x0123456789ABCDEF);
+        const __m128i vprod2x89ABCDEF = _mm256_extracti128_si256(vprod2x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod2x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod2x89ABCDEF));
+
+        const __m256i vi3x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i3));
+        const __m256i vk3x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 48 * sizeof(int8_t))));
+
+        const __m256i vprod3x0123456789ABCDEF = _mm256_mullo_epi16(vi3x0123456789ABCDEF, vk3x0123456789ABCDEF);
+        const __m128i vprod3x89ABCDEF = _mm256_extracti128_si256(vprod3x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod3x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod3x89ABCDEF));
+
+        const __m256i vi4x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i4));
+        const __m256i vk4x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 64 * sizeof(int8_t))));
+
+        const __m256i vprod4x0123456789ABCDEF = _mm256_mullo_epi16(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF);
+        const __m128i vprod4x89ABCDEF = _mm256_extracti128_si256(vprod4x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod4x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod4x89ABCDEF));
+
+        const __m256i vi5x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i5));
+        const __m256i vk5x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 80 * sizeof(int8_t))));
+
+        const __m256i vprod5x0123456789ABCDEF = _mm256_mullo_epi16(vi5x0123456789ABCDEF, vk5x0123456789ABCDEF);
+        const __m128i vprod5x89ABCDEF = _mm256_extracti128_si256(vprod5x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod5x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod5x89ABCDEF));
+
+        const __m256i vi6x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i6));
+        const __m256i vk6x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 96 * sizeof(int8_t))));
+
+        const __m256i vprod6x0123456789ABCDEF = _mm256_mullo_epi16(vi6x0123456789ABCDEF, vk6x0123456789ABCDEF);
+        const __m128i vprod6x89ABCDEF = _mm256_extracti128_si256(vprod6x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod6x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod6x89ABCDEF));
+
+        const __m256i vi7x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i7));
+        const __m256i vk7x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 112 * sizeof(int8_t))));
+
+        const __m256i vprod7x0123456789ABCDEF = _mm256_mullo_epi16(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF);
+        const __m128i vprod7x89ABCDEF = _mm256_extracti128_si256(vprod7x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod7x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod7x89ABCDEF));
+
+        const __m256i vi8x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i8));
+        const __m256i vk8x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 128 * sizeof(int8_t))));
+
+        const __m256i vprod8x0123456789ABCDEF = _mm256_mullo_epi16(vi8x0123456789ABCDEF, vk8x0123456789ABCDEF);
+        const __m128i vprod8x89ABCDEF = _mm256_extracti128_si256(vprod8x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod8x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod8x89ABCDEF));
+
+        const __m256i vi9x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i9));
+        const __m256i vk9x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 144 * sizeof(int8_t))));
+
+        const __m256i vprod9x0123456789ABCDEF = _mm256_mullo_epi16(vi9x0123456789ABCDEF, vk9x0123456789ABCDEF);
+        const __m128i vprod9x89ABCDEF = _mm256_extracti128_si256(vprod9x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod9x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod9x89ABCDEF));
+
+        const __m256i vi10x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i10));
+        const __m256i vk10x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 160 * sizeof(int8_t))));
+
+        const __m256i vprod10x0123456789ABCDEF = _mm256_mullo_epi16(vi10x0123456789ABCDEF, vk10x0123456789ABCDEF);
+        const __m128i vprod10x89ABCDEF = _mm256_extracti128_si256(vprod10x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod10x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod10x89ABCDEF));
+
+        const __m256i vi11x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i11));
+        const __m256i vk11x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 176 * sizeof(int8_t))));
+
+        const __m256i vprod11x0123456789ABCDEF = _mm256_mullo_epi16(vi11x0123456789ABCDEF, vk11x0123456789ABCDEF);
+        const __m128i vprod11x89ABCDEF = _mm256_extracti128_si256(vprod11x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod11x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod11x89ABCDEF));
+
+        const __m256i vi12x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i12));
+        const __m256i vk12x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 192 * sizeof(int8_t))));
+
+        const __m256i vprod12x0123456789ABCDEF = _mm256_mullo_epi16(vi12x0123456789ABCDEF, vk12x0123456789ABCDEF);
+        const __m128i vprod12x89ABCDEF = _mm256_extracti128_si256(vprod12x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod12x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod12x89ABCDEF));
+
+        const __m256i vi13x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i13));
+        const __m256i vk13x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 208 * sizeof(int8_t))));
+
+        const __m256i vprod13x0123456789ABCDEF = _mm256_mullo_epi16(vi13x0123456789ABCDEF, vk13x0123456789ABCDEF);
+        const __m128i vprod13x89ABCDEF = _mm256_extracti128_si256(vprod13x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod13x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod13x89ABCDEF));
+
+        const __m256i vi14x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i14));
+        const __m256i vk14x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 224 * sizeof(int8_t))));
+
+        const __m256i vprod14x0123456789ABCDEF = _mm256_mullo_epi16(vi14x0123456789ABCDEF, vk14x0123456789ABCDEF);
+        const __m128i vprod14x89ABCDEF = _mm256_extracti128_si256(vprod14x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod14x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod14x89ABCDEF));
+
+        const __m256i vi15x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i15));
+        const __m256i vk15x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 240 * sizeof(int8_t))));
+
+        const __m256i vprod15x0123456789ABCDEF = _mm256_mullo_epi16(vi15x0123456789ABCDEF, vk15x0123456789ABCDEF);
+        const __m128i vprod15x89ABCDEF = _mm256_extracti128_si256(vprod15x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod15x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod15x89ABCDEF));
+
+        const __m256i vi16x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i16));
+        const __m256i vk16x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 256 * sizeof(int8_t))));
+
+        const __m256i vprod16x0123456789ABCDEF = _mm256_mullo_epi16(vi16x0123456789ABCDEF, vk16x0123456789ABCDEF);
+        const __m128i vprod16x89ABCDEF = _mm256_extracti128_si256(vprod16x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod16x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod16x89ABCDEF));
+
+        const __m256i vi17x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i17));
+        const __m256i vk17x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 272 * sizeof(int8_t))));
+
+        const __m256i vprod17x0123456789ABCDEF = _mm256_mullo_epi16(vi17x0123456789ABCDEF, vk17x0123456789ABCDEF);
+        const __m128i vprod17x89ABCDEF = _mm256_extracti128_si256(vprod17x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod17x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod17x89ABCDEF));
+
+        const __m256i vi18x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i18));
+        const __m256i vk18x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 288 * sizeof(int8_t))));
+
+        const __m256i vprod18x0123456789ABCDEF = _mm256_mullo_epi16(vi18x0123456789ABCDEF, vk18x0123456789ABCDEF);
+        const __m128i vprod18x89ABCDEF = _mm256_extracti128_si256(vprod18x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod18x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod18x89ABCDEF));
+
+        const __m256i vi19x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i19));
+        const __m256i vk19x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 304 * sizeof(int8_t))));
+
+        const __m256i vprod19x0123456789ABCDEF = _mm256_mullo_epi16(vi19x0123456789ABCDEF, vk19x0123456789ABCDEF);
+        const __m128i vprod19x89ABCDEF = _mm256_extracti128_si256(vprod19x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod19x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod19x89ABCDEF));
+
+        const __m256i vi20x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i20));
+        const __m256i vk20x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 320 * sizeof(int8_t))));
+
+        const __m256i vprod20x0123456789ABCDEF = _mm256_mullo_epi16(vi20x0123456789ABCDEF, vk20x0123456789ABCDEF);
+        const __m128i vprod20x89ABCDEF = _mm256_extracti128_si256(vprod20x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod20x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod20x89ABCDEF));
+
+        const __m256i vi21x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i21));
+        const __m256i vk21x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 336 * sizeof(int8_t))));
+
+        const __m256i vprod21x0123456789ABCDEF = _mm256_mullo_epi16(vi21x0123456789ABCDEF, vk21x0123456789ABCDEF);
+        const __m128i vprod21x89ABCDEF = _mm256_extracti128_si256(vprod21x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod21x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod21x89ABCDEF));
+
+        const __m256i vi22x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i22));
+        const __m256i vk22x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 352 * sizeof(int8_t))));
+
+        const __m256i vprod22x0123456789ABCDEF = _mm256_mullo_epi16(vi22x0123456789ABCDEF, vk22x0123456789ABCDEF);
+        const __m128i vprod22x89ABCDEF = _mm256_extracti128_si256(vprod22x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod22x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod22x89ABCDEF));
+
+        const __m256i vi23x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i23));
+        const __m256i vk23x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 368 * sizeof(int8_t))));
+
+        const __m256i vprod23x0123456789ABCDEF = _mm256_mullo_epi16(vi23x0123456789ABCDEF, vk23x0123456789ABCDEF);
+        const __m128i vprod23x89ABCDEF = _mm256_extracti128_si256(vprod23x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod23x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod23x89ABCDEF));
+
+        const __m256i vi24x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i24));
+        const __m256i vk24x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 384 * sizeof(int8_t))));
+
+        const __m256i vprod24x0123456789ABCDEF = _mm256_mullo_epi16(vi24x0123456789ABCDEF, vk24x0123456789ABCDEF);
+        const __m128i vprod24x89ABCDEF = _mm256_extracti128_si256(vprod24x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod24x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod24x89ABCDEF));
+
+
+        __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+        __m256 vscaled89ABCDEF = _mm256_cvtepi32_ps(vacc89ABCDEF);
+
+        const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+        vscaled01234567 = _mm256_mul_ps(vscaled01234567, vscale);
+        vscaled89ABCDEF = _mm256_mul_ps(vscaled89ABCDEF, vscale);
+
+        vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+        vacc89ABCDEF = _mm256_cvtps_epi32(vscaled89ABCDEF);
+
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->fp32_avx2.output_zero_point);
+        __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
+        __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc89ABCDEF), _mm256_extracti128_si256(vacc89ABCDEF, 1)), voutput_zero_point);
+
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+
+        __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
+        vout0123456789ABCDEF = _mm_min_epi8(_mm_max_epi8(vout0123456789ABCDEF, voutput_min), voutput_max);
+
+        if (c & 8) {
+          _mm_storel_epi64((__m128i*) output, vout0123456789ABCDEF);
+          vout0123456789ABCDEF = _mm_unpackhi_epi64(vout0123456789ABCDEF, vout0123456789ABCDEF);
+          output += 8;
+        }
+        if (c & 4) {
+          *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456789ABCDEF);
+          vout0123456789ABCDEF = _mm_srli_epi64(vout0123456789ABCDEF, 32);
+          output += 4;
+        }
+        if (c & 2) {
+          *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456789ABCDEF, 0);
+          vout0123456789ABCDEF = _mm_srli_epi32(vout0123456789ABCDEF, 16);
+          output += 2;
+        }
+        if (c & 1) {
+          *output = (int8_t) _mm_extract_epi8(vout0123456789ABCDEF, 0);
+          output += 1;
+        }
+      }
+    }
+
+    output = (int8_t*) ((uintptr_t) output + output_increment);
+  } while (--output_width != 0);
+}
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-fp32-avx2-mul32.c b/src/qs8-dwconv/gen/up16x25-minmax-fp32-avx2-mul32.c
new file mode 100644
index 0000000..4cf1156
--- /dev/null
+++ b/src/qs8-dwconv/gen/up16x25-minmax-fp32-avx2-mul32.c
@@ -0,0 +1,615 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-dwconv/unipass-avx2-mul32.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <immintrin.h>
+
+#include <xnnpack/dwconv.h>
+
+
+void xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32(
+    size_t channels,
+    size_t output_width,
+    const int8_t** input,
+    const void* weights,
+    int8_t* output,
+    size_t input_stride,
+    size_t output_increment,
+    size_t input_offset,
+    const int8_t* zero,
+    const union xnn_qs8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(channels != 0);
+  assert(output_width != 0);
+
+  do {
+    const int8_t* i0 = input[0];
+    assert(i0 != NULL);
+    if XNN_UNPREDICTABLE(i0 != zero) {
+      i0 = (const int8_t*) ((uintptr_t) i0 + input_offset);
+    }
+    const int8_t* i1 = input[1];
+    assert(i1 != NULL);
+    if XNN_UNPREDICTABLE(i1 != zero) {
+      i1 = (const int8_t*) ((uintptr_t) i1 + input_offset);
+    }
+    const int8_t* i2 = input[2];
+    assert(i2 != NULL);
+    if XNN_UNPREDICTABLE(i2 != zero) {
+      i2 = (const int8_t*) ((uintptr_t) i2 + input_offset);
+    }
+    const int8_t* i3 = input[3];
+    assert(i3 != NULL);
+    if XNN_UNPREDICTABLE(i3 != zero) {
+      i3 = (const int8_t*) ((uintptr_t) i3 + input_offset);
+    }
+    const int8_t* i4 = input[4];
+    assert(i4 != NULL);
+    if XNN_UNPREDICTABLE(i4 != zero) {
+      i4 = (const int8_t*) ((uintptr_t) i4 + input_offset);
+    }
+    const int8_t* i5 = input[5];
+    assert(i5 != NULL);
+    if XNN_UNPREDICTABLE(i5 != zero) {
+      i5 = (const int8_t*) ((uintptr_t) i5 + input_offset);
+    }
+    const int8_t* i6 = input[6];
+    assert(i6 != NULL);
+    if XNN_UNPREDICTABLE(i6 != zero) {
+      i6 = (const int8_t*) ((uintptr_t) i6 + input_offset);
+    }
+    const int8_t* i7 = input[7];
+    assert(i7 != NULL);
+    if XNN_UNPREDICTABLE(i7 != zero) {
+      i7 = (const int8_t*) ((uintptr_t) i7 + input_offset);
+    }
+    const int8_t* i8 = input[8];
+    assert(i8 != NULL);
+    if XNN_UNPREDICTABLE(i8 != zero) {
+      i8 = (const int8_t*) ((uintptr_t) i8 + input_offset);
+    }
+    const int8_t* i9 = input[9];
+    assert(i9 != NULL);
+    if XNN_UNPREDICTABLE(i9 != zero) {
+      i9 = (const int8_t*) ((uintptr_t) i9 + input_offset);
+    }
+    const int8_t* i10 = input[10];
+    assert(i10 != NULL);
+    if XNN_UNPREDICTABLE(i10 != zero) {
+      i10 = (const int8_t*) ((uintptr_t) i10 + input_offset);
+    }
+    const int8_t* i11 = input[11];
+    assert(i11 != NULL);
+    if XNN_UNPREDICTABLE(i11 != zero) {
+      i11 = (const int8_t*) ((uintptr_t) i11 + input_offset);
+    }
+    const int8_t* i12 = input[12];
+    assert(i12 != NULL);
+    if XNN_UNPREDICTABLE(i12 != zero) {
+      i12 = (const int8_t*) ((uintptr_t) i12 + input_offset);
+    }
+    const int8_t* i13 = input[13];
+    assert(i13 != NULL);
+    if XNN_UNPREDICTABLE(i13 != zero) {
+      i13 = (const int8_t*) ((uintptr_t) i13 + input_offset);
+    }
+    const int8_t* i14 = input[14];
+    assert(i14 != NULL);
+    if XNN_UNPREDICTABLE(i14 != zero) {
+      i14 = (const int8_t*) ((uintptr_t) i14 + input_offset);
+    }
+    const int8_t* i15 = input[15];
+    assert(i15 != NULL);
+    if XNN_UNPREDICTABLE(i15 != zero) {
+      i15 = (const int8_t*) ((uintptr_t) i15 + input_offset);
+    }
+    const int8_t* i16 = input[16];
+    assert(i16 != NULL);
+    if XNN_UNPREDICTABLE(i16 != zero) {
+      i16 = (const int8_t*) ((uintptr_t) i16 + input_offset);
+    }
+    const int8_t* i17 = input[17];
+    assert(i17 != NULL);
+    if XNN_UNPREDICTABLE(i17 != zero) {
+      i17 = (const int8_t*) ((uintptr_t) i17 + input_offset);
+    }
+    const int8_t* i18 = input[18];
+    assert(i18 != NULL);
+    if XNN_UNPREDICTABLE(i18 != zero) {
+      i18 = (const int8_t*) ((uintptr_t) i18 + input_offset);
+    }
+    const int8_t* i19 = input[19];
+    assert(i19 != NULL);
+    if XNN_UNPREDICTABLE(i19 != zero) {
+      i19 = (const int8_t*) ((uintptr_t) i19 + input_offset);
+    }
+    const int8_t* i20 = input[20];
+    assert(i20 != NULL);
+    if XNN_UNPREDICTABLE(i20 != zero) {
+      i20 = (const int8_t*) ((uintptr_t) i20 + input_offset);
+    }
+    const int8_t* i21 = input[21];
+    assert(i21 != NULL);
+    if XNN_UNPREDICTABLE(i21 != zero) {
+      i21 = (const int8_t*) ((uintptr_t) i21 + input_offset);
+    }
+    const int8_t* i22 = input[22];
+    assert(i22 != NULL);
+    if XNN_UNPREDICTABLE(i22 != zero) {
+      i22 = (const int8_t*) ((uintptr_t) i22 + input_offset);
+    }
+    const int8_t* i23 = input[23];
+    assert(i23 != NULL);
+    if XNN_UNPREDICTABLE(i23 != zero) {
+      i23 = (const int8_t*) ((uintptr_t) i23 + input_offset);
+    }
+    const int8_t* i24 = input[24];
+    assert(i24 != NULL);
+    if XNN_UNPREDICTABLE(i24 != zero) {
+      i24 = (const int8_t*) ((uintptr_t) i24 + input_offset);
+    }
+    input = (const int8_t**) ((uintptr_t) input + input_stride);
+
+    size_t c = channels;
+    const void* w = weights;
+    for (; c >= 16; c -= 16) {
+      __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+      __m256i vacc89ABCDEF = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 8 * sizeof(int32_t)));
+
+
+      const __m256i vi0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i0));
+      const __m256i vk0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 0 * sizeof(int8_t))));
+      const __m256i vi0x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i0 + 8)));
+      const __m256i vk0x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 8 * sizeof(int8_t))));
+      i0 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi0x01234567, vk0x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi0x89ABCDEF, vk0x89ABCDEF));
+
+      const __m256i vi1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i1));
+      const __m256i vk1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 16 * sizeof(int8_t))));
+      const __m256i vi1x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i1 + 8)));
+      const __m256i vk1x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 24 * sizeof(int8_t))));
+      i1 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi1x01234567, vk1x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi1x89ABCDEF, vk1x89ABCDEF));
+
+      const __m256i vi2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i2));
+      const __m256i vk2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 32 * sizeof(int8_t))));
+      const __m256i vi2x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i2 + 8)));
+      const __m256i vk2x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 40 * sizeof(int8_t))));
+      i2 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi2x01234567, vk2x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi2x89ABCDEF, vk2x89ABCDEF));
+
+      const __m256i vi3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i3));
+      const __m256i vk3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 48 * sizeof(int8_t))));
+      const __m256i vi3x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i3 + 8)));
+      const __m256i vk3x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 56 * sizeof(int8_t))));
+      i3 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi3x01234567, vk3x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi3x89ABCDEF, vk3x89ABCDEF));
+
+      const __m256i vi4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i4));
+      const __m256i vk4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 64 * sizeof(int8_t))));
+      const __m256i vi4x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i4 + 8)));
+      const __m256i vk4x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 72 * sizeof(int8_t))));
+      i4 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi4x01234567, vk4x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi4x89ABCDEF, vk4x89ABCDEF));
+
+      const __m256i vi5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i5));
+      const __m256i vk5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 80 * sizeof(int8_t))));
+      const __m256i vi5x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i5 + 8)));
+      const __m256i vk5x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 88 * sizeof(int8_t))));
+      i5 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi5x01234567, vk5x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi5x89ABCDEF, vk5x89ABCDEF));
+
+      const __m256i vi6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i6));
+      const __m256i vk6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 96 * sizeof(int8_t))));
+      const __m256i vi6x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i6 + 8)));
+      const __m256i vk6x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 104 * sizeof(int8_t))));
+      i6 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi6x01234567, vk6x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi6x89ABCDEF, vk6x89ABCDEF));
+
+      const __m256i vi7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i7));
+      const __m256i vk7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 112 * sizeof(int8_t))));
+      const __m256i vi7x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i7 + 8)));
+      const __m256i vk7x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 120 * sizeof(int8_t))));
+      i7 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi7x01234567, vk7x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi7x89ABCDEF, vk7x89ABCDEF));
+
+      const __m256i vi8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i8));
+      const __m256i vk8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 128 * sizeof(int8_t))));
+      const __m256i vi8x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i8 + 8)));
+      const __m256i vk8x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 136 * sizeof(int8_t))));
+      i8 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi8x01234567, vk8x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi8x89ABCDEF, vk8x89ABCDEF));
+
+      const __m256i vi9x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i9));
+      const __m256i vk9x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 144 * sizeof(int8_t))));
+      const __m256i vi9x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i9 + 8)));
+      const __m256i vk9x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 152 * sizeof(int8_t))));
+      i9 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi9x01234567, vk9x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi9x89ABCDEF, vk9x89ABCDEF));
+
+      const __m256i vi10x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i10));
+      const __m256i vk10x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 160 * sizeof(int8_t))));
+      const __m256i vi10x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i10 + 8)));
+      const __m256i vk10x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 168 * sizeof(int8_t))));
+      i10 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi10x01234567, vk10x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi10x89ABCDEF, vk10x89ABCDEF));
+
+      const __m256i vi11x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i11));
+      const __m256i vk11x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 176 * sizeof(int8_t))));
+      const __m256i vi11x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i11 + 8)));
+      const __m256i vk11x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 184 * sizeof(int8_t))));
+      i11 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi11x01234567, vk11x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi11x89ABCDEF, vk11x89ABCDEF));
+
+      const __m256i vi12x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i12));
+      const __m256i vk12x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 192 * sizeof(int8_t))));
+      const __m256i vi12x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i12 + 8)));
+      const __m256i vk12x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 200 * sizeof(int8_t))));
+      i12 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi12x01234567, vk12x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi12x89ABCDEF, vk12x89ABCDEF));
+
+      const __m256i vi13x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i13));
+      const __m256i vk13x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 208 * sizeof(int8_t))));
+      const __m256i vi13x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i13 + 8)));
+      const __m256i vk13x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 216 * sizeof(int8_t))));
+      i13 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi13x01234567, vk13x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi13x89ABCDEF, vk13x89ABCDEF));
+
+      const __m256i vi14x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i14));
+      const __m256i vk14x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 224 * sizeof(int8_t))));
+      const __m256i vi14x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i14 + 8)));
+      const __m256i vk14x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 232 * sizeof(int8_t))));
+      i14 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi14x01234567, vk14x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi14x89ABCDEF, vk14x89ABCDEF));
+
+      const __m256i vi15x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i15));
+      const __m256i vk15x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 240 * sizeof(int8_t))));
+      const __m256i vi15x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i15 + 8)));
+      const __m256i vk15x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 248 * sizeof(int8_t))));
+      i15 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi15x01234567, vk15x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi15x89ABCDEF, vk15x89ABCDEF));
+
+      const __m256i vi16x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i16));
+      const __m256i vk16x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 256 * sizeof(int8_t))));
+      const __m256i vi16x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i16 + 8)));
+      const __m256i vk16x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 264 * sizeof(int8_t))));
+      i16 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi16x01234567, vk16x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi16x89ABCDEF, vk16x89ABCDEF));
+
+      const __m256i vi17x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i17));
+      const __m256i vk17x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 272 * sizeof(int8_t))));
+      const __m256i vi17x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i17 + 8)));
+      const __m256i vk17x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 280 * sizeof(int8_t))));
+      i17 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi17x01234567, vk17x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi17x89ABCDEF, vk17x89ABCDEF));
+
+      const __m256i vi18x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i18));
+      const __m256i vk18x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 288 * sizeof(int8_t))));
+      const __m256i vi18x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i18 + 8)));
+      const __m256i vk18x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 296 * sizeof(int8_t))));
+      i18 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi18x01234567, vk18x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi18x89ABCDEF, vk18x89ABCDEF));
+
+      const __m256i vi19x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i19));
+      const __m256i vk19x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 304 * sizeof(int8_t))));
+      const __m256i vi19x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i19 + 8)));
+      const __m256i vk19x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 312 * sizeof(int8_t))));
+      i19 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi19x01234567, vk19x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi19x89ABCDEF, vk19x89ABCDEF));
+
+      const __m256i vi20x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i20));
+      const __m256i vk20x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 320 * sizeof(int8_t))));
+      const __m256i vi20x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i20 + 8)));
+      const __m256i vk20x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 328 * sizeof(int8_t))));
+      i20 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi20x01234567, vk20x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi20x89ABCDEF, vk20x89ABCDEF));
+
+      const __m256i vi21x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i21));
+      const __m256i vk21x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 336 * sizeof(int8_t))));
+      const __m256i vi21x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i21 + 8)));
+      const __m256i vk21x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 344 * sizeof(int8_t))));
+      i21 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi21x01234567, vk21x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi21x89ABCDEF, vk21x89ABCDEF));
+
+      const __m256i vi22x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i22));
+      const __m256i vk22x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 352 * sizeof(int8_t))));
+      const __m256i vi22x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i22 + 8)));
+      const __m256i vk22x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 360 * sizeof(int8_t))));
+      i22 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi22x01234567, vk22x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi22x89ABCDEF, vk22x89ABCDEF));
+
+      const __m256i vi23x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i23));
+      const __m256i vk23x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 368 * sizeof(int8_t))));
+      const __m256i vi23x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i23 + 8)));
+      const __m256i vk23x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 376 * sizeof(int8_t))));
+      i23 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi23x01234567, vk23x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi23x89ABCDEF, vk23x89ABCDEF));
+
+      const __m256i vi24x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i24));
+      const __m256i vk24x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 384 * sizeof(int8_t))));
+      const __m256i vi24x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i24 + 8)));
+      const __m256i vk24x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 392 * sizeof(int8_t))));
+      i24 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi24x01234567, vk24x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi24x89ABCDEF, vk24x89ABCDEF));
+
+      w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t) + 400 * sizeof(int8_t));
+
+      __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+      __m256 vscaled89ABCDEF = _mm256_cvtepi32_ps(vacc89ABCDEF);
+
+      const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+      vscaled01234567 = _mm256_mul_ps(vscaled01234567, vscale);
+      vscaled89ABCDEF = _mm256_mul_ps(vscaled89ABCDEF, vscale);
+
+      vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+      vacc89ABCDEF = _mm256_cvtps_epi32(vscaled89ABCDEF);
+
+      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->fp32_avx2.output_zero_point);
+      __m256i vout012389AB4567CDEF = _mm256_adds_epi16(_mm256_packs_epi32(vacc01234567, vacc89ABCDEF), voutput_zero_point);
+
+      __m128i vout0123456789ABCDEF = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(vout012389AB4567CDEF), _mm256_extracti128_si256(vout012389AB4567CDEF, 1)), _MM_SHUFFLE(3, 1, 2, 0));
+
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+      vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
+      vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
+
+      _mm_storeu_si128((__m128i*) output, vout0123456789ABCDEF);
+      output += 16;
+    }
+    if XNN_UNLIKELY(c != 0) {
+      const int8_t* k = (const int8_t*) ((uintptr_t) w + 16 * sizeof(int32_t));
+      do {
+        __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+
+
+        const __m256i vi0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i0));
+        const __m256i vk0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) k));
+        i0 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi0x01234567, vk0x01234567));
+
+        const __m256i vi1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i1));
+        const __m256i vk1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 16)));
+        i1 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi1x01234567, vk1x01234567));
+
+        const __m256i vi2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i2));
+        const __m256i vk2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 32)));
+        i2 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi2x01234567, vk2x01234567));
+
+        const __m256i vi3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i3));
+        const __m256i vk3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 48)));
+        i3 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi3x01234567, vk3x01234567));
+
+        const __m256i vi4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i4));
+        const __m256i vk4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 64)));
+        i4 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi4x01234567, vk4x01234567));
+
+        const __m256i vi5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i5));
+        const __m256i vk5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 80)));
+        i5 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi5x01234567, vk5x01234567));
+
+        const __m256i vi6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i6));
+        const __m256i vk6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 96)));
+        i6 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi6x01234567, vk6x01234567));
+
+        const __m256i vi7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i7));
+        const __m256i vk7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 112)));
+        i7 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi7x01234567, vk7x01234567));
+
+        const __m256i vi8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i8));
+        const __m256i vk8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 128)));
+        i8 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi8x01234567, vk8x01234567));
+
+        const __m256i vi9x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i9));
+        const __m256i vk9x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 144)));
+        i9 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi9x01234567, vk9x01234567));
+
+        const __m256i vi10x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i10));
+        const __m256i vk10x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 160)));
+        i10 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi10x01234567, vk10x01234567));
+
+        const __m256i vi11x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i11));
+        const __m256i vk11x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 176)));
+        i11 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi11x01234567, vk11x01234567));
+
+        const __m256i vi12x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i12));
+        const __m256i vk12x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 192)));
+        i12 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi12x01234567, vk12x01234567));
+
+        const __m256i vi13x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i13));
+        const __m256i vk13x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 208)));
+        i13 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi13x01234567, vk13x01234567));
+
+        const __m256i vi14x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i14));
+        const __m256i vk14x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 224)));
+        i14 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi14x01234567, vk14x01234567));
+
+        const __m256i vi15x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i15));
+        const __m256i vk15x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 240)));
+        i15 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi15x01234567, vk15x01234567));
+
+        const __m256i vi16x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i16));
+        const __m256i vk16x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 256)));
+        i16 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi16x01234567, vk16x01234567));
+
+        const __m256i vi17x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i17));
+        const __m256i vk17x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 272)));
+        i17 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi17x01234567, vk17x01234567));
+
+        const __m256i vi18x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i18));
+        const __m256i vk18x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 288)));
+        i18 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi18x01234567, vk18x01234567));
+
+        const __m256i vi19x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i19));
+        const __m256i vk19x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 304)));
+        i19 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi19x01234567, vk19x01234567));
+
+        const __m256i vi20x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i20));
+        const __m256i vk20x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 320)));
+        i20 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi20x01234567, vk20x01234567));
+
+        const __m256i vi21x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i21));
+        const __m256i vk21x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 336)));
+        i21 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi21x01234567, vk21x01234567));
+
+        const __m256i vi22x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i22));
+        const __m256i vk22x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 352)));
+        i22 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi22x01234567, vk22x01234567));
+
+        const __m256i vi23x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i23));
+        const __m256i vk23x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 368)));
+        i23 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi23x01234567, vk23x01234567));
+
+        const __m256i vi24x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i24));
+        const __m256i vk24x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 384)));
+        i24 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi24x01234567, vk24x01234567));
+
+        w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
+        k += 8;
+
+        __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+        vscaled01234567 = _mm256_mul_ps(vscaled01234567, _mm256_load_ps(params->fp32_avx2.scale));
+        vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->fp32_avx2.output_zero_point);
+        __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
+
+        __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
+        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
+
+        if XNN_LIKELY(c >= 8) {
+          _mm_storel_epi64((__m128i*) output, vout0123456701234567);
+          output += 8;
+          c -= 8;
+        } else {
+          if (c & 4) {
+            *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567);
+            vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32);
+            output += 4;
+          }
+          if (c & 2) {
+            *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0);
+            vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16);
+            output += 2;
+          }
+          if (c & 1) {
+            *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0);
+            output += 1;
+          }
+          c = 0;
+        }
+      } while (c != 0);
+    }
+
+    output = (int8_t*) ((uintptr_t) output + output_increment);
+  } while (--output_width != 0);
+}
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx-mul16.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx-mul16.c
index b7e5dee..0319710 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx-mul16.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx-mul16.c
@@ -693,8 +693,8 @@
 
       w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t) + 400 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -723,7 +723,7 @@
       const __m128i vq31prod89AB = _mm_blend_epi16(vq31prod8A, vq31prod9B, 0xCC);
       const __m128i vq31prodCDEF = _mm_blend_epi16(vq31prodCE, vq31prodDF, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -733,8 +733,8 @@
       const __m128i vremCDEF =
         _mm_add_epi32(_mm_and_si128(vq31prodCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodCDEF));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -744,17 +744,17 @@
       vaccCDEF =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
 
 
       __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
 
       _mm_storeu_si128((__m128i*) output, vout0123456789ABCDEF);
@@ -1095,8 +1095,8 @@
         w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
         k += 8;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
         const __m128i vacc57 = _mm_shuffle_epi32(vacc4567, _MM_SHUFFLE(3, 3, 1, 1));
@@ -1115,27 +1115,27 @@
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
         const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
-        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 8) {
           _mm_storel_epi64((__m128i*) output, vout0123456701234567);
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx-mul32.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx-mul32.c
index 7014a60..559b173 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx-mul32.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx-mul32.c
@@ -544,8 +544,8 @@
 
       w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t) + 400 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -574,7 +574,7 @@
       const __m128i vq31prod89AB = _mm_blend_epi16(vq31prod8A, vq31prod9B, 0xCC);
       const __m128i vq31prodCDEF = _mm_blend_epi16(vq31prodCE, vq31prodDF, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -584,8 +584,8 @@
       const __m128i vremCDEF =
         _mm_add_epi32(_mm_and_si128(vq31prodCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodCDEF));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -595,12 +595,12 @@
       vaccCDEF =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
@@ -767,8 +767,8 @@
         w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t));
         k += 4;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -780,21 +780,21 @@
 
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc0123), voutput_zero_point);
 
         vout0123 = _mm_packs_epi16(vout0123, vout0123);
-        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 4) {
           _mm_storeu_si32(output, vout0123);
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul16.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul16.c
index b849733..ffd862a 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul16.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul16.c
@@ -390,9 +390,8 @@
       vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod24x89ABCDEF));
 
       w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t) + 400 * sizeof(int8_t));
-
-      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
       const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
       const __m256i vacc9BDF = _mm256_shuffle_epi32(vacc89ABCDEF, _MM_SHUFFLE(3, 3, 1, 1));
@@ -410,26 +409,26 @@
       const __m256i vq31prod01234567 = _mm256_blend_epi16(vq31prod0246, vq31prod1357, 0xCC);
       const __m256i vq31prod89ABCDEF = _mm256_blend_epi16(vq31prod8ACE, vq31prod9BDF, 0xCC);
 
-      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
       const __m256i vrem01234567 =
         _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
       const __m256i vrem89ABCDEF =
         _mm256_add_epi32(_mm256_and_si256(vq31prod89ABCDEF, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod89ABCDEF));
 
-      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
+      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_avx2.shift);
       vacc01234567 =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
       vacc89ABCDEF =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prod89ABCDEF, vshift), _mm256_cmpgt_epi32(vrem89ABCDEF, vremainder_threshold));
 
-      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx2.output_zero_point);
+      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_zero_point);
       const __m256i vout012389AB4567CDEF = _mm256_adds_epi16(_mm256_packs_epi32(vacc01234567, vacc89ABCDEF), voutput_zero_point);
 
       __m128i vout0123456789ABCDEF = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(vout012389AB4567CDEF), _mm256_extracti128_si256(vout012389AB4567CDEF, 1)), _MM_SHUFFLE(3, 1, 2, 0));
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
 
@@ -643,8 +642,8 @@
         vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod24x89ABCDEF));
 
 
-        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
         const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
         const __m256i vacc9BDF = _mm256_shuffle_epi32(vacc89ABCDEF, _MM_SHUFFLE(3, 3, 1, 1));
@@ -662,25 +661,25 @@
         const __m256i vq31prod01234567 = _mm256_blend_epi16(vq31prod0246, vq31prod1357, 0xCC);
         const __m256i vq31prod89ABCDEF = _mm256_blend_epi16(vq31prod8ACE, vq31prod9BDF, 0xCC);
 
-        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
         const __m256i vrem01234567 =
           _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
         const __m256i vrem89ABCDEF =
           _mm256_add_epi32(_mm256_and_si256(vq31prod89ABCDEF, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod89ABCDEF));
 
-        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
+        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_avx2.shift);
         vacc01234567 =
           _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
         vacc89ABCDEF =
           _mm256_sub_epi32(_mm256_sra_epi32(vq31prod89ABCDEF, vshift), _mm256_cmpgt_epi32(vrem89ABCDEF, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->avx2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
         __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc89ABCDEF), _mm256_extracti128_si256(vacc89ABCDEF, 1)), voutput_zero_point);
 
-        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
-        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
 
         __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
         vout0123456789ABCDEF = _mm_min_epi8(_mm_max_epi8(vout0123456789ABCDEF, voutput_min), voutput_max);
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul32.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul32.c
index 810d524..9db5041 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul32.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx2-mul32.c
@@ -391,8 +391,8 @@
 
       w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t) + 400 * sizeof(int8_t));
 
-      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
       const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
       const __m256i vacc9BDF = _mm256_shuffle_epi32(vacc89ABCDEF, _MM_SHUFFLE(3, 3, 1, 1));
@@ -410,26 +410,26 @@
       const __m256i vq31prod01234567 = _mm256_blend_epi16(vq31prod0246, vq31prod1357, 0xCC);
       const __m256i vq31prod89ABCDEF = _mm256_blend_epi16(vq31prod8ACE, vq31prod9BDF, 0xCC);
 
-      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
       const __m256i vrem01234567 =
         _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
       const __m256i vrem89ABCDEF =
         _mm256_add_epi32(_mm256_and_si256(vq31prod89ABCDEF, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod89ABCDEF));
 
-      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
+      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_avx2.shift);
       vacc01234567 =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
       vacc89ABCDEF =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prod89ABCDEF, vshift), _mm256_cmpgt_epi32(vrem89ABCDEF, vremainder_threshold));
 
-      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx2.output_zero_point);
+      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_zero_point);
       __m256i vout012389AB4567CDEF = _mm256_adds_epi16(_mm256_packs_epi32(vacc01234567, vacc89ABCDEF), voutput_zero_point);
 
       __m128i vout0123456789ABCDEF = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(vout012389AB4567CDEF), _mm256_extracti128_si256(vout012389AB4567CDEF, 1)), _MM_SHUFFLE(3, 1, 2, 0));
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
 
@@ -595,8 +595,8 @@
         w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
         k += 8;
 
-        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
         const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -608,21 +608,21 @@
 
         const __m256i vq31prod01234567 = _mm256_blend_epi16(vq31prod0246, vq31prod1357, 0xCC);
 
-        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
         const __m256i vrem01234567 =
           _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
 
-        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-        const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
+        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+        const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.shift);
         vacc01234567 =
           _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->avx2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
-        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
-        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
         vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
         vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
 
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx512skx-mul32.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx512skx-mul32.c
index ef0bc78..891b6ca 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx512skx-mul32.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-avx512skx-mul32.c
@@ -31,14 +31,14 @@
   assert(output_width != 0);
 
   const __mmask16 vblend_mask = _cvtu32_mask16(0xAAAA);
-  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.multiplier));
-  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.rounding));
-  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.remainder_mask));
-  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.remainder_threshold));
-  const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
-  const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.output_zero_point));
-  const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.output_min));
-  const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.output_max));
+  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier));
+  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding));
+  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask));
+  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold));
+  const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
+  const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point));
+  const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min));
+  const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max));
 
   do {
     const int8_t* i0 = input[0];
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-neon-mul16.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-neon-mul16.c
index 2b6e60e..02e88f5 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-neon-mul16.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-neon-mul16.c
@@ -29,12 +29,12 @@
   assert(channels != 0);
   assert(output_width != 0);
 
-  const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
-  const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+  const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
+  const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
   const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
-  const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
-  const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-  const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+  const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
+  const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+  const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
   do {
     const int8_t* i0 = input[0];
     assert(i0 != NULL);
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse2-mul16.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse2-mul16.c
index a14750c..cf1ace0 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse2-mul16.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse2-mul16.c
@@ -693,8 +693,8 @@
 
       w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t) + 400 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
       const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
       const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -760,7 +760,7 @@
       const __m128i vq31prod89AB = _mm_shuffle_epi32(vq31prod8A9B, _MM_SHUFFLE(3, 1, 2, 0));
       const __m128i vq31prodCDEF = _mm_shuffle_epi32(vq31prodCEDF, _MM_SHUFFLE(3, 1, 2, 0));
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -770,8 +770,8 @@
       const __m128i vremCDEF =
         _mm_add_epi32(_mm_and_si128(vq31prodCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodCDEF));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -781,15 +781,15 @@
       vaccCDEF =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
       vout01234567 = _mm_max_epi16(vout01234567, voutput_min);
       vout89ABCDEF = _mm_max_epi16(vout89ABCDEF, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
       vout01234567 = _mm_min_epi16(vout01234567, voutput_max);
       vout89ABCDEF = _mm_min_epi16(vout89ABCDEF, voutput_max);
 
@@ -1134,8 +1134,8 @@
         w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
         k += 8;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
         const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
         const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -1174,24 +1174,24 @@
         const __m128i vq31prod0123 = _mm_shuffle_epi32(vq31prod0213, _MM_SHUFFLE(3, 1, 2, 0));
         const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0));
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
-        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_min));
-        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_max));
+        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min));
+        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max));
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse41-mul16.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse41-mul16.c
index cff6ab9..2de930e 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse41-mul16.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse41-mul16.c
@@ -693,8 +693,8 @@
 
       w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t) + 400 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -723,7 +723,7 @@
       const __m128i vq31prod89AB = _mm_blend_epi16(vq31prod8A, vq31prod9B, 0xCC);
       const __m128i vq31prodCDEF = _mm_blend_epi16(vq31prodCE, vq31prodDF, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -733,8 +733,8 @@
       const __m128i vremCDEF =
         _mm_add_epi32(_mm_and_si128(vq31prodCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodCDEF));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -744,17 +744,17 @@
       vaccCDEF =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
 
 
       __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
 
       _mm_storeu_si128((__m128i*) output, vout0123456789ABCDEF);
@@ -1095,8 +1095,8 @@
         w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
         k += 8;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
         const __m128i vacc57 = _mm_shuffle_epi32(vacc4567, _MM_SHUFFLE(3, 3, 1, 1));
@@ -1115,27 +1115,27 @@
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
         const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
-        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 8) {
           _mm_storel_epi64((__m128i*) output, vout0123456701234567);
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse41-mul32.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse41-mul32.c
index 1b5ca78..46063dc 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse41-mul32.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-sse41-mul32.c
@@ -544,8 +544,8 @@
 
       w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t) + 400 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -574,7 +574,7 @@
       const __m128i vq31prod89AB = _mm_blend_epi16(vq31prod8A, vq31prod9B, 0xCC);
       const __m128i vq31prodCDEF = _mm_blend_epi16(vq31prodCE, vq31prodDF, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -584,8 +584,8 @@
       const __m128i vremCDEF =
         _mm_add_epi32(_mm_and_si128(vq31prodCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodCDEF));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -595,12 +595,12 @@
       vaccCDEF =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
@@ -767,8 +767,8 @@
         w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t));
         k += 4;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -780,21 +780,21 @@
 
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc0123), voutput_zero_point);
 
         vout0123 = _mm_packs_epi16(vout0123, vout0123);
-        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 4) {
           _mm_storeu_si32(output, vout0123);
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-ssse3-mul16.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-ssse3-mul16.c
index 11496f8..74e7d01 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-ssse3-mul16.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-ssse3-mul16.c
@@ -693,8 +693,8 @@
 
       w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t) + 400 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
       const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
       const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -760,7 +760,7 @@
       const __m128i vq31prod89AB = _mm_shuffle_epi32(vq31prod8A9B, _MM_SHUFFLE(3, 1, 2, 0));
       const __m128i vq31prodCDEF = _mm_shuffle_epi32(vq31prodCEDF, _MM_SHUFFLE(3, 1, 2, 0));
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -770,8 +770,8 @@
       const __m128i vremCDEF =
         _mm_add_epi32(_mm_and_si128(vq31prodCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodCDEF));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -781,15 +781,15 @@
       vaccCDEF =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
       vout01234567 = _mm_max_epi16(vout01234567, voutput_min);
       vout89ABCDEF = _mm_max_epi16(vout89ABCDEF, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
       vout01234567 = _mm_min_epi16(vout01234567, voutput_max);
       vout89ABCDEF = _mm_min_epi16(vout89ABCDEF, voutput_max);
 
@@ -1134,8 +1134,8 @@
         w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
         k += 8;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
         const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
         const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -1174,24 +1174,24 @@
         const __m128i vq31prod0123 = _mm_shuffle_epi32(vq31prod0213, _MM_SHUFFLE(3, 1, 2, 0));
         const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0));
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
-        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_min));
-        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_max));
+        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min));
+        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max));
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-wasmsimd-mul16.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-wasmsimd-mul16.c
index fc47ece..39c5ac8 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-wasmsimd-mul16.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-wasmsimd-mul16.c
@@ -532,8 +532,8 @@
       const v128_t vaccCD = wasm_v32x4_shuffle(vaccCDEF, vsignCDEF, 0, 4, 1, 5);
       const v128_t vaccEF = wasm_v32x4_shuffle(vaccCDEF, vsignCDEF, 2, 6, 3, 7);
 
-      const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-      const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+      const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+      const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
       const v128_t vprod01 = wasm_i64x2_add(wasm_i64x2_mul(vacc01, vmultiplier), vrounding);
       const v128_t vprod23 = wasm_i64x2_add(wasm_i64x2_mul(vacc23, vmultiplier), vrounding);
       const v128_t vprod45 = wasm_i64x2_add(wasm_i64x2_mul(vacc45, vmultiplier), vrounding);
@@ -548,25 +548,25 @@
       const v128_t vq31prod89AB = wasm_v32x4_shuffle(vprod89, vprodAB, 1, 3, 5, 7);
       const v128_t vq31prodCDEF = wasm_v32x4_shuffle(vprodCD, vprodEF, 1, 3, 5, 7);
 
-      const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+      const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
       const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0123, vremainder_mask), wasm_i32x4_shr(vq31prod0123, 31));
       const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vq31prod4567, vremainder_mask), wasm_i32x4_shr(vq31prod4567, 31));
       const v128_t vrem89AB = wasm_i32x4_add(wasm_v128_and(vq31prod89AB, vremainder_mask), wasm_i32x4_shr(vq31prod89AB, 31));
       const v128_t vremCDEF = wasm_i32x4_add(wasm_v128_and(vq31prodCDEF, vremainder_mask), wasm_i32x4_shr(vq31prodCDEF, 31));
 
-      const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-      const int32_t vshift = params->wasmsimd.shift;
+      const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+      const int32_t vshift = params->gemmlowp_wasmsimd.shift;
       vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0123, vshift), wasm_i32x4_gt(vrem0123, vthreshold));
       vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod4567, vshift), wasm_i32x4_gt(vrem4567, vthreshold));
       vacc89AB = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod89AB, vshift), wasm_i32x4_gt(vrem89AB, vthreshold));
       vaccCDEF = wasm_i32x4_sub(wasm_i32x4_shr(vq31prodCDEF, vshift), wasm_i32x4_gt(vremCDEF, vthreshold));
 
-      const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+      const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
       v128_t vout01234567 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0123, vacc4567), voutput_zero_point);
       v128_t vout89ABCDEF = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc89AB, vaccCDEF), voutput_zero_point);
 
-      const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
-      const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+      const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
+      const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
       v128_t vout0123456789ABCDEF = wasm_i8x16_min(wasm_i8x16_max(wasm_i8x16_narrow_i16x8(vout01234567, vout89ABCDEF), voutput_min), voutput_max);
 
       wasm_v128_store(output, vout0123456789ABCDEF);
@@ -815,8 +815,8 @@
       const v128_t vacc45 = wasm_v32x4_shuffle(vacc4567, vsign4567, 0, 4, 1, 5);
       const v128_t vacc67 = wasm_v32x4_shuffle(vacc4567, vsign4567, 2, 6, 3, 7);
 
-      const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-      const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+      const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+      const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
       const v128_t vprod01 = wasm_i64x2_add(wasm_i64x2_mul(vacc01, vmultiplier), vrounding);
       const v128_t vprod23 = wasm_i64x2_add(wasm_i64x2_mul(vacc23, vmultiplier), vrounding);
       const v128_t vprod45 = wasm_i64x2_add(wasm_i64x2_mul(vacc45, vmultiplier), vrounding);
@@ -825,20 +825,20 @@
       const v128_t vq31prod0123 = wasm_v32x4_shuffle(vprod01, vprod23, 1, 3, 5, 7);
       const v128_t vq31prod4567 = wasm_v32x4_shuffle(vprod45, vprod67, 1, 3, 5, 7);
 
-      const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+      const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
       const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0123, vremainder_mask), wasm_i32x4_shr(vq31prod0123, 31));
       const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vq31prod4567, vremainder_mask), wasm_i32x4_shr(vq31prod4567, 31));
 
-      const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-      const int32_t vshift = params->wasmsimd.shift;
+      const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+      const int32_t vshift = params->gemmlowp_wasmsimd.shift;
       vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0123, vshift), wasm_i32x4_gt(vrem0123, vthreshold));
       vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod4567, vshift), wasm_i32x4_gt(vrem4567, vthreshold));
 
-      const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+      const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
       v128_t vout01234567 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0123, vacc4567), voutput_zero_point);
 
-      const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
-      const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+      const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
+      const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
       v128_t vout0123456701234567 = wasm_i8x16_min(wasm_i8x16_max(wasm_i8x16_narrow_i16x8(vout01234567, vout01234567), voutput_min), voutput_max);
 
       if XNN_LIKELY(c >= 8) {
diff --git a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-xop-mul32.c b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-xop-mul32.c
index c81791e..0f0e92d 100644
--- a/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-xop-mul32.c
+++ b/src/qs8-dwconv/gen/up16x25-minmax-gemmlowp-xop-mul32.c
@@ -549,8 +549,8 @@
 
       w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t) + 400 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -579,7 +579,7 @@
       const __m128i vq31prod89AB = _mm_blend_epi16(vq31prod8A, vq31prod9B, 0xCC);
       const __m128i vq31prodCDEF = _mm_blend_epi16(vq31prodCE, vq31prodDF, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -589,8 +589,8 @@
       const __m128i vremCDEF =
         _mm_add_epi32(_mm_and_si128(vq31prodCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodCDEF));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -600,12 +600,12 @@
       vaccCDEF =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
@@ -772,8 +772,8 @@
         w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t));
         k += 4;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -785,21 +785,21 @@
 
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc0123), voutput_zero_point);
 
         vout0123 = _mm_packs_epi16(vout0123, vout0123);
-        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 4) {
           _mm_storeu_si32(output, vout0123);
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-fp32-avx2-mul16.c b/src/qs8-dwconv/gen/up16x9-minmax-fp32-avx2-mul16.c
new file mode 100644
index 0000000..9a002b0
--- /dev/null
+++ b/src/qs8-dwconv/gen/up16x9-minmax-fp32-avx2-mul16.c
@@ -0,0 +1,315 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-dwconv/unipass-avx2-mul16.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <immintrin.h>
+
+#include <xnnpack/dwconv.h>
+
+
+void xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16(
+    size_t channels,
+    size_t output_width,
+    const int8_t** input,
+    const void* weights,
+    int8_t* output,
+    size_t input_stride,
+    size_t output_increment,
+    size_t input_offset,
+    const int8_t* zero,
+    const union xnn_qs8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(channels != 0);
+  assert(output_width != 0);
+
+  do {
+    const int8_t* i0 = input[0];
+    assert(i0 != NULL);
+    if XNN_UNPREDICTABLE(i0 != zero) {
+      i0 = (const int8_t*) ((uintptr_t) i0 + input_offset);
+    }
+    const int8_t* i1 = input[1];
+    assert(i1 != NULL);
+    if XNN_UNPREDICTABLE(i1 != zero) {
+      i1 = (const int8_t*) ((uintptr_t) i1 + input_offset);
+    }
+    const int8_t* i2 = input[2];
+    assert(i2 != NULL);
+    if XNN_UNPREDICTABLE(i2 != zero) {
+      i2 = (const int8_t*) ((uintptr_t) i2 + input_offset);
+    }
+    const int8_t* i3 = input[3];
+    assert(i3 != NULL);
+    if XNN_UNPREDICTABLE(i3 != zero) {
+      i3 = (const int8_t*) ((uintptr_t) i3 + input_offset);
+    }
+    const int8_t* i4 = input[4];
+    assert(i4 != NULL);
+    if XNN_UNPREDICTABLE(i4 != zero) {
+      i4 = (const int8_t*) ((uintptr_t) i4 + input_offset);
+    }
+    const int8_t* i5 = input[5];
+    assert(i5 != NULL);
+    if XNN_UNPREDICTABLE(i5 != zero) {
+      i5 = (const int8_t*) ((uintptr_t) i5 + input_offset);
+    }
+    const int8_t* i6 = input[6];
+    assert(i6 != NULL);
+    if XNN_UNPREDICTABLE(i6 != zero) {
+      i6 = (const int8_t*) ((uintptr_t) i6 + input_offset);
+    }
+    const int8_t* i7 = input[7];
+    assert(i7 != NULL);
+    if XNN_UNPREDICTABLE(i7 != zero) {
+      i7 = (const int8_t*) ((uintptr_t) i7 + input_offset);
+    }
+    const int8_t* i8 = input[8];
+    assert(i8 != NULL);
+    if XNN_UNPREDICTABLE(i8 != zero) {
+      i8 = (const int8_t*) ((uintptr_t) i8 + input_offset);
+    }
+    input = (const int8_t**) ((uintptr_t) input + input_stride);
+
+    size_t c = channels;
+    const void* w = weights;
+    for (; c >= 16; c -= 16) {
+      __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+      __m256i vacc89ABCDEF = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 8 * sizeof(int32_t)));
+
+
+      const __m256i vi0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i0));
+      const __m256i vk0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 0 * sizeof(int8_t))));
+      i0 += 16;
+
+      const __m256i vprod0x0123456789ABCDEF =  _mm256_mullo_epi16(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF);
+      const __m128i vprod0x89ABCDEF = _mm256_extracti128_si256(vprod0x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod0x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod0x89ABCDEF));
+
+      const __m256i vi1x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i1));
+      const __m256i vk1x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 16 * sizeof(int8_t))));
+      i1 += 16;
+
+      const __m256i vprod1x0123456789ABCDEF =  _mm256_mullo_epi16(vi1x0123456789ABCDEF, vk1x0123456789ABCDEF);
+      const __m128i vprod1x89ABCDEF = _mm256_extracti128_si256(vprod1x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod1x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod1x89ABCDEF));
+
+      const __m256i vi2x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i2));
+      const __m256i vk2x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 32 * sizeof(int8_t))));
+      i2 += 16;
+
+      const __m256i vprod2x0123456789ABCDEF =  _mm256_mullo_epi16(vi2x0123456789ABCDEF, vk2x0123456789ABCDEF);
+      const __m128i vprod2x89ABCDEF = _mm256_extracti128_si256(vprod2x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod2x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod2x89ABCDEF));
+
+      const __m256i vi3x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i3));
+      const __m256i vk3x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 48 * sizeof(int8_t))));
+      i3 += 16;
+
+      const __m256i vprod3x0123456789ABCDEF =  _mm256_mullo_epi16(vi3x0123456789ABCDEF, vk3x0123456789ABCDEF);
+      const __m128i vprod3x89ABCDEF = _mm256_extracti128_si256(vprod3x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod3x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod3x89ABCDEF));
+
+      const __m256i vi4x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i4));
+      const __m256i vk4x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 64 * sizeof(int8_t))));
+      i4 += 16;
+
+      const __m256i vprod4x0123456789ABCDEF =  _mm256_mullo_epi16(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF);
+      const __m128i vprod4x89ABCDEF = _mm256_extracti128_si256(vprod4x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod4x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod4x89ABCDEF));
+
+      const __m256i vi5x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i5));
+      const __m256i vk5x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 80 * sizeof(int8_t))));
+      i5 += 16;
+
+      const __m256i vprod5x0123456789ABCDEF =  _mm256_mullo_epi16(vi5x0123456789ABCDEF, vk5x0123456789ABCDEF);
+      const __m128i vprod5x89ABCDEF = _mm256_extracti128_si256(vprod5x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod5x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod5x89ABCDEF));
+
+      const __m256i vi6x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i6));
+      const __m256i vk6x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 96 * sizeof(int8_t))));
+      i6 += 16;
+
+      const __m256i vprod6x0123456789ABCDEF =  _mm256_mullo_epi16(vi6x0123456789ABCDEF, vk6x0123456789ABCDEF);
+      const __m128i vprod6x89ABCDEF = _mm256_extracti128_si256(vprod6x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod6x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod6x89ABCDEF));
+
+      const __m256i vi7x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i7));
+      const __m256i vk7x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 112 * sizeof(int8_t))));
+      i7 += 16;
+
+      const __m256i vprod7x0123456789ABCDEF =  _mm256_mullo_epi16(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF);
+      const __m128i vprod7x89ABCDEF = _mm256_extracti128_si256(vprod7x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod7x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod7x89ABCDEF));
+
+      const __m256i vi8x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i8));
+      const __m256i vk8x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 128 * sizeof(int8_t))));
+      i8 += 16;
+
+      const __m256i vprod8x0123456789ABCDEF =  _mm256_mullo_epi16(vi8x0123456789ABCDEF, vk8x0123456789ABCDEF);
+      const __m128i vprod8x89ABCDEF = _mm256_extracti128_si256(vprod8x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod8x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod8x89ABCDEF));
+
+      w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t) + 144 * sizeof(int8_t));
+      __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+      __m256 vscaled89ABCDEF = _mm256_cvtepi32_ps(vacc89ABCDEF);
+
+      const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+      vscaled01234567 = _mm256_mul_ps(vscaled01234567, vscale);
+      vscaled89ABCDEF = _mm256_mul_ps(vscaled89ABCDEF, vscale);
+
+      vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+      vacc89ABCDEF = _mm256_cvtps_epi32(vscaled89ABCDEF);
+
+      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->fp32_avx2.output_zero_point);
+      const __m256i vout012389AB4567CDEF = _mm256_adds_epi16(_mm256_packs_epi32(vacc01234567, vacc89ABCDEF), voutput_zero_point);
+
+      __m128i vout0123456789ABCDEF = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(vout012389AB4567CDEF), _mm256_extracti128_si256(vout012389AB4567CDEF, 1)), _MM_SHUFFLE(3, 1, 2, 0));
+
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+      vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
+      vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
+
+      _mm_storeu_si128((__m128i*) output, vout0123456789ABCDEF);
+      output += 16;
+    }
+    if XNN_UNLIKELY(c != 0) {
+      {
+        __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+        __m256i vacc89ABCDEF = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 8 * sizeof(int32_t)));
+
+
+        const __m256i vi0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i0));
+        const __m256i vk0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 0 * sizeof(int8_t))));
+
+        const __m256i vprod0x0123456789ABCDEF = _mm256_mullo_epi16(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF);
+        const __m128i vprod0x89ABCDEF = _mm256_extracti128_si256(vprod0x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod0x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod0x89ABCDEF));
+
+        const __m256i vi1x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i1));
+        const __m256i vk1x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 16 * sizeof(int8_t))));
+
+        const __m256i vprod1x0123456789ABCDEF = _mm256_mullo_epi16(vi1x0123456789ABCDEF, vk1x0123456789ABCDEF);
+        const __m128i vprod1x89ABCDEF = _mm256_extracti128_si256(vprod1x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod1x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod1x89ABCDEF));
+
+        const __m256i vi2x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i2));
+        const __m256i vk2x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 32 * sizeof(int8_t))));
+
+        const __m256i vprod2x0123456789ABCDEF = _mm256_mullo_epi16(vi2x0123456789ABCDEF, vk2x0123456789ABCDEF);
+        const __m128i vprod2x89ABCDEF = _mm256_extracti128_si256(vprod2x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod2x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod2x89ABCDEF));
+
+        const __m256i vi3x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i3));
+        const __m256i vk3x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 48 * sizeof(int8_t))));
+
+        const __m256i vprod3x0123456789ABCDEF = _mm256_mullo_epi16(vi3x0123456789ABCDEF, vk3x0123456789ABCDEF);
+        const __m128i vprod3x89ABCDEF = _mm256_extracti128_si256(vprod3x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod3x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod3x89ABCDEF));
+
+        const __m256i vi4x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i4));
+        const __m256i vk4x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 64 * sizeof(int8_t))));
+
+        const __m256i vprod4x0123456789ABCDEF = _mm256_mullo_epi16(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF);
+        const __m128i vprod4x89ABCDEF = _mm256_extracti128_si256(vprod4x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod4x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod4x89ABCDEF));
+
+        const __m256i vi5x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i5));
+        const __m256i vk5x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 80 * sizeof(int8_t))));
+
+        const __m256i vprod5x0123456789ABCDEF = _mm256_mullo_epi16(vi5x0123456789ABCDEF, vk5x0123456789ABCDEF);
+        const __m128i vprod5x89ABCDEF = _mm256_extracti128_si256(vprod5x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod5x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod5x89ABCDEF));
+
+        const __m256i vi6x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i6));
+        const __m256i vk6x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 96 * sizeof(int8_t))));
+
+        const __m256i vprod6x0123456789ABCDEF = _mm256_mullo_epi16(vi6x0123456789ABCDEF, vk6x0123456789ABCDEF);
+        const __m128i vprod6x89ABCDEF = _mm256_extracti128_si256(vprod6x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod6x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod6x89ABCDEF));
+
+        const __m256i vi7x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i7));
+        const __m256i vk7x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 112 * sizeof(int8_t))));
+
+        const __m256i vprod7x0123456789ABCDEF = _mm256_mullo_epi16(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF);
+        const __m128i vprod7x89ABCDEF = _mm256_extracti128_si256(vprod7x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod7x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod7x89ABCDEF));
+
+        const __m256i vi8x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i8));
+        const __m256i vk8x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 128 * sizeof(int8_t))));
+
+        const __m256i vprod8x0123456789ABCDEF = _mm256_mullo_epi16(vi8x0123456789ABCDEF, vk8x0123456789ABCDEF);
+        const __m128i vprod8x89ABCDEF = _mm256_extracti128_si256(vprod8x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod8x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod8x89ABCDEF));
+
+
+        __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+        __m256 vscaled89ABCDEF = _mm256_cvtepi32_ps(vacc89ABCDEF);
+
+        const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+        vscaled01234567 = _mm256_mul_ps(vscaled01234567, vscale);
+        vscaled89ABCDEF = _mm256_mul_ps(vscaled89ABCDEF, vscale);
+
+        vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+        vacc89ABCDEF = _mm256_cvtps_epi32(vscaled89ABCDEF);
+
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->fp32_avx2.output_zero_point);
+        __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
+        __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc89ABCDEF), _mm256_extracti128_si256(vacc89ABCDEF, 1)), voutput_zero_point);
+
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+
+        __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
+        vout0123456789ABCDEF = _mm_min_epi8(_mm_max_epi8(vout0123456789ABCDEF, voutput_min), voutput_max);
+
+        if (c & 8) {
+          _mm_storel_epi64((__m128i*) output, vout0123456789ABCDEF);
+          vout0123456789ABCDEF = _mm_unpackhi_epi64(vout0123456789ABCDEF, vout0123456789ABCDEF);
+          output += 8;
+        }
+        if (c & 4) {
+          *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456789ABCDEF);
+          vout0123456789ABCDEF = _mm_srli_epi64(vout0123456789ABCDEF, 32);
+          output += 4;
+        }
+        if (c & 2) {
+          *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456789ABCDEF, 0);
+          vout0123456789ABCDEF = _mm_srli_epi32(vout0123456789ABCDEF, 16);
+          output += 2;
+        }
+        if (c & 1) {
+          *output = (int8_t) _mm_extract_epi8(vout0123456789ABCDEF, 0);
+          output += 1;
+        }
+      }
+    }
+
+    output = (int8_t*) ((uintptr_t) output + output_increment);
+  } while (--output_width != 0);
+}
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-fp32-avx2-mul32.c b/src/qs8-dwconv/gen/up16x9-minmax-fp32-avx2-mul32.c
new file mode 100644
index 0000000..9b34f93
--- /dev/null
+++ b/src/qs8-dwconv/gen/up16x9-minmax-fp32-avx2-mul32.c
@@ -0,0 +1,295 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-dwconv/unipass-avx2-mul32.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <immintrin.h>
+
+#include <xnnpack/dwconv.h>
+
+
+void xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32(
+    size_t channels,
+    size_t output_width,
+    const int8_t** input,
+    const void* weights,
+    int8_t* output,
+    size_t input_stride,
+    size_t output_increment,
+    size_t input_offset,
+    const int8_t* zero,
+    const union xnn_qs8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(channels != 0);
+  assert(output_width != 0);
+
+  do {
+    const int8_t* i0 = input[0];
+    assert(i0 != NULL);
+    if XNN_UNPREDICTABLE(i0 != zero) {
+      i0 = (const int8_t*) ((uintptr_t) i0 + input_offset);
+    }
+    const int8_t* i1 = input[1];
+    assert(i1 != NULL);
+    if XNN_UNPREDICTABLE(i1 != zero) {
+      i1 = (const int8_t*) ((uintptr_t) i1 + input_offset);
+    }
+    const int8_t* i2 = input[2];
+    assert(i2 != NULL);
+    if XNN_UNPREDICTABLE(i2 != zero) {
+      i2 = (const int8_t*) ((uintptr_t) i2 + input_offset);
+    }
+    const int8_t* i3 = input[3];
+    assert(i3 != NULL);
+    if XNN_UNPREDICTABLE(i3 != zero) {
+      i3 = (const int8_t*) ((uintptr_t) i3 + input_offset);
+    }
+    const int8_t* i4 = input[4];
+    assert(i4 != NULL);
+    if XNN_UNPREDICTABLE(i4 != zero) {
+      i4 = (const int8_t*) ((uintptr_t) i4 + input_offset);
+    }
+    const int8_t* i5 = input[5];
+    assert(i5 != NULL);
+    if XNN_UNPREDICTABLE(i5 != zero) {
+      i5 = (const int8_t*) ((uintptr_t) i5 + input_offset);
+    }
+    const int8_t* i6 = input[6];
+    assert(i6 != NULL);
+    if XNN_UNPREDICTABLE(i6 != zero) {
+      i6 = (const int8_t*) ((uintptr_t) i6 + input_offset);
+    }
+    const int8_t* i7 = input[7];
+    assert(i7 != NULL);
+    if XNN_UNPREDICTABLE(i7 != zero) {
+      i7 = (const int8_t*) ((uintptr_t) i7 + input_offset);
+    }
+    const int8_t* i8 = input[8];
+    assert(i8 != NULL);
+    if XNN_UNPREDICTABLE(i8 != zero) {
+      i8 = (const int8_t*) ((uintptr_t) i8 + input_offset);
+    }
+    input = (const int8_t**) ((uintptr_t) input + input_stride);
+
+    size_t c = channels;
+    const void* w = weights;
+    for (; c >= 16; c -= 16) {
+      __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+      __m256i vacc89ABCDEF = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 8 * sizeof(int32_t)));
+
+
+      const __m256i vi0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i0));
+      const __m256i vk0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 0 * sizeof(int8_t))));
+      const __m256i vi0x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i0 + 8)));
+      const __m256i vk0x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 8 * sizeof(int8_t))));
+      i0 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi0x01234567, vk0x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi0x89ABCDEF, vk0x89ABCDEF));
+
+      const __m256i vi1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i1));
+      const __m256i vk1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 16 * sizeof(int8_t))));
+      const __m256i vi1x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i1 + 8)));
+      const __m256i vk1x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 24 * sizeof(int8_t))));
+      i1 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi1x01234567, vk1x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi1x89ABCDEF, vk1x89ABCDEF));
+
+      const __m256i vi2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i2));
+      const __m256i vk2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 32 * sizeof(int8_t))));
+      const __m256i vi2x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i2 + 8)));
+      const __m256i vk2x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 40 * sizeof(int8_t))));
+      i2 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi2x01234567, vk2x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi2x89ABCDEF, vk2x89ABCDEF));
+
+      const __m256i vi3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i3));
+      const __m256i vk3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 48 * sizeof(int8_t))));
+      const __m256i vi3x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i3 + 8)));
+      const __m256i vk3x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 56 * sizeof(int8_t))));
+      i3 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi3x01234567, vk3x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi3x89ABCDEF, vk3x89ABCDEF));
+
+      const __m256i vi4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i4));
+      const __m256i vk4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 64 * sizeof(int8_t))));
+      const __m256i vi4x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i4 + 8)));
+      const __m256i vk4x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 72 * sizeof(int8_t))));
+      i4 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi4x01234567, vk4x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi4x89ABCDEF, vk4x89ABCDEF));
+
+      const __m256i vi5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i5));
+      const __m256i vk5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 80 * sizeof(int8_t))));
+      const __m256i vi5x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i5 + 8)));
+      const __m256i vk5x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 88 * sizeof(int8_t))));
+      i5 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi5x01234567, vk5x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi5x89ABCDEF, vk5x89ABCDEF));
+
+      const __m256i vi6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i6));
+      const __m256i vk6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 96 * sizeof(int8_t))));
+      const __m256i vi6x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i6 + 8)));
+      const __m256i vk6x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 104 * sizeof(int8_t))));
+      i6 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi6x01234567, vk6x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi6x89ABCDEF, vk6x89ABCDEF));
+
+      const __m256i vi7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i7));
+      const __m256i vk7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 112 * sizeof(int8_t))));
+      const __m256i vi7x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i7 + 8)));
+      const __m256i vk7x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 120 * sizeof(int8_t))));
+      i7 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi7x01234567, vk7x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi7x89ABCDEF, vk7x89ABCDEF));
+
+      const __m256i vi8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i8));
+      const __m256i vk8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 128 * sizeof(int8_t))));
+      const __m256i vi8x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i8 + 8)));
+      const __m256i vk8x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 16 * sizeof(int32_t) + 136 * sizeof(int8_t))));
+      i8 += 16;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi8x01234567, vk8x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi8x89ABCDEF, vk8x89ABCDEF));
+
+      w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t) + 144 * sizeof(int8_t));
+
+      __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+      __m256 vscaled89ABCDEF = _mm256_cvtepi32_ps(vacc89ABCDEF);
+
+      const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+      vscaled01234567 = _mm256_mul_ps(vscaled01234567, vscale);
+      vscaled89ABCDEF = _mm256_mul_ps(vscaled89ABCDEF, vscale);
+
+      vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+      vacc89ABCDEF = _mm256_cvtps_epi32(vscaled89ABCDEF);
+
+      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->fp32_avx2.output_zero_point);
+      __m256i vout012389AB4567CDEF = _mm256_adds_epi16(_mm256_packs_epi32(vacc01234567, vacc89ABCDEF), voutput_zero_point);
+
+      __m128i vout0123456789ABCDEF = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(vout012389AB4567CDEF), _mm256_extracti128_si256(vout012389AB4567CDEF, 1)), _MM_SHUFFLE(3, 1, 2, 0));
+
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+      vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
+      vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
+
+      _mm_storeu_si128((__m128i*) output, vout0123456789ABCDEF);
+      output += 16;
+    }
+    if XNN_UNLIKELY(c != 0) {
+      const int8_t* k = (const int8_t*) ((uintptr_t) w + 16 * sizeof(int32_t));
+      do {
+        __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+
+
+        const __m256i vi0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i0));
+        const __m256i vk0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) k));
+        i0 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi0x01234567, vk0x01234567));
+
+        const __m256i vi1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i1));
+        const __m256i vk1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 16)));
+        i1 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi1x01234567, vk1x01234567));
+
+        const __m256i vi2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i2));
+        const __m256i vk2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 32)));
+        i2 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi2x01234567, vk2x01234567));
+
+        const __m256i vi3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i3));
+        const __m256i vk3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 48)));
+        i3 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi3x01234567, vk3x01234567));
+
+        const __m256i vi4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i4));
+        const __m256i vk4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 64)));
+        i4 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi4x01234567, vk4x01234567));
+
+        const __m256i vi5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i5));
+        const __m256i vk5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 80)));
+        i5 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi5x01234567, vk5x01234567));
+
+        const __m256i vi6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i6));
+        const __m256i vk6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 96)));
+        i6 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi6x01234567, vk6x01234567));
+
+        const __m256i vi7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i7));
+        const __m256i vk7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 112)));
+        i7 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi7x01234567, vk7x01234567));
+
+        const __m256i vi8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i8));
+        const __m256i vk8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 128)));
+        i8 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi8x01234567, vk8x01234567));
+
+        w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
+        k += 8;
+
+        __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+        vscaled01234567 = _mm256_mul_ps(vscaled01234567, _mm256_load_ps(params->fp32_avx2.scale));
+        vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->fp32_avx2.output_zero_point);
+        __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
+
+        __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
+        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
+
+        if XNN_LIKELY(c >= 8) {
+          _mm_storel_epi64((__m128i*) output, vout0123456701234567);
+          output += 8;
+          c -= 8;
+        } else {
+          if (c & 4) {
+            *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567);
+            vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32);
+            output += 4;
+          }
+          if (c & 2) {
+            *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0);
+            vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16);
+            output += 2;
+          }
+          if (c & 1) {
+            *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0);
+            output += 1;
+          }
+          c = 0;
+        }
+      } while (c != 0);
+    }
+
+    output = (int8_t*) ((uintptr_t) output + output_increment);
+  } while (--output_width != 0);
+}
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx-mul16.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx-mul16.c
index 70819c4..522eafb 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx-mul16.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx-mul16.c
@@ -277,8 +277,8 @@
 
       w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t) + 144 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -307,7 +307,7 @@
       const __m128i vq31prod89AB = _mm_blend_epi16(vq31prod8A, vq31prod9B, 0xCC);
       const __m128i vq31prodCDEF = _mm_blend_epi16(vq31prodCE, vq31prodDF, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -317,8 +317,8 @@
       const __m128i vremCDEF =
         _mm_add_epi32(_mm_and_si128(vq31prodCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodCDEF));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -328,17 +328,17 @@
       vaccCDEF =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
 
 
       __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
 
       _mm_storeu_si128((__m128i*) output, vout0123456789ABCDEF);
@@ -471,8 +471,8 @@
         w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
         k += 8;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
         const __m128i vacc57 = _mm_shuffle_epi32(vacc4567, _MM_SHUFFLE(3, 3, 1, 1));
@@ -491,27 +491,27 @@
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
         const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
-        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 8) {
           _mm_storel_epi64((__m128i*) output, vout0123456701234567);
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx-mul32.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx-mul32.c
index 5ba8e77..3132d70 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx-mul32.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx-mul32.c
@@ -224,8 +224,8 @@
 
       w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t) + 144 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -254,7 +254,7 @@
       const __m128i vq31prod89AB = _mm_blend_epi16(vq31prod8A, vq31prod9B, 0xCC);
       const __m128i vq31prodCDEF = _mm_blend_epi16(vq31prodCE, vq31prodDF, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -264,8 +264,8 @@
       const __m128i vremCDEF =
         _mm_add_epi32(_mm_and_si128(vq31prodCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodCDEF));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -275,12 +275,12 @@
       vaccCDEF =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
@@ -351,8 +351,8 @@
         w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t));
         k += 4;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -364,21 +364,21 @@
 
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc0123), voutput_zero_point);
 
         vout0123 = _mm_packs_epi16(vout0123, vout0123);
-        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 4) {
           _mm_storeu_si32(output, vout0123);
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul16.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul16.c
index 5587b9e..5476ae8 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul16.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul16.c
@@ -166,9 +166,8 @@
       vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod8x89ABCDEF));
 
       w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t) + 144 * sizeof(int8_t));
-
-      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
       const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
       const __m256i vacc9BDF = _mm256_shuffle_epi32(vacc89ABCDEF, _MM_SHUFFLE(3, 3, 1, 1));
@@ -186,26 +185,26 @@
       const __m256i vq31prod01234567 = _mm256_blend_epi16(vq31prod0246, vq31prod1357, 0xCC);
       const __m256i vq31prod89ABCDEF = _mm256_blend_epi16(vq31prod8ACE, vq31prod9BDF, 0xCC);
 
-      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
       const __m256i vrem01234567 =
         _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
       const __m256i vrem89ABCDEF =
         _mm256_add_epi32(_mm256_and_si256(vq31prod89ABCDEF, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod89ABCDEF));
 
-      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
+      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_avx2.shift);
       vacc01234567 =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
       vacc89ABCDEF =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prod89ABCDEF, vshift), _mm256_cmpgt_epi32(vrem89ABCDEF, vremainder_threshold));
 
-      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx2.output_zero_point);
+      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_zero_point);
       const __m256i vout012389AB4567CDEF = _mm256_adds_epi16(_mm256_packs_epi32(vacc01234567, vacc89ABCDEF), voutput_zero_point);
 
       __m128i vout0123456789ABCDEF = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(vout012389AB4567CDEF), _mm256_extracti128_si256(vout012389AB4567CDEF, 1)), _MM_SHUFFLE(3, 1, 2, 0));
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
 
@@ -291,8 +290,8 @@
         vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod8x89ABCDEF));
 
 
-        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
         const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
         const __m256i vacc9BDF = _mm256_shuffle_epi32(vacc89ABCDEF, _MM_SHUFFLE(3, 3, 1, 1));
@@ -310,25 +309,25 @@
         const __m256i vq31prod01234567 = _mm256_blend_epi16(vq31prod0246, vq31prod1357, 0xCC);
         const __m256i vq31prod89ABCDEF = _mm256_blend_epi16(vq31prod8ACE, vq31prod9BDF, 0xCC);
 
-        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
         const __m256i vrem01234567 =
           _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
         const __m256i vrem89ABCDEF =
           _mm256_add_epi32(_mm256_and_si256(vq31prod89ABCDEF, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod89ABCDEF));
 
-        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
+        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_avx2.shift);
         vacc01234567 =
           _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
         vacc89ABCDEF =
           _mm256_sub_epi32(_mm256_sra_epi32(vq31prod89ABCDEF, vshift), _mm256_cmpgt_epi32(vrem89ABCDEF, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->avx2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
         __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc89ABCDEF), _mm256_extracti128_si256(vacc89ABCDEF, 1)), voutput_zero_point);
 
-        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
-        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
 
         __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
         vout0123456789ABCDEF = _mm_min_epi8(_mm_max_epi8(vout0123456789ABCDEF, voutput_min), voutput_max);
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul32.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul32.c
index 2b3af4e..a52e31a 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul32.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx2-mul32.c
@@ -167,8 +167,8 @@
 
       w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t) + 144 * sizeof(int8_t));
 
-      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
       const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
       const __m256i vacc9BDF = _mm256_shuffle_epi32(vacc89ABCDEF, _MM_SHUFFLE(3, 3, 1, 1));
@@ -186,26 +186,26 @@
       const __m256i vq31prod01234567 = _mm256_blend_epi16(vq31prod0246, vq31prod1357, 0xCC);
       const __m256i vq31prod89ABCDEF = _mm256_blend_epi16(vq31prod8ACE, vq31prod9BDF, 0xCC);
 
-      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
       const __m256i vrem01234567 =
         _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
       const __m256i vrem89ABCDEF =
         _mm256_add_epi32(_mm256_and_si256(vq31prod89ABCDEF, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod89ABCDEF));
 
-      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
+      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_avx2.shift);
       vacc01234567 =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
       vacc89ABCDEF =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prod89ABCDEF, vshift), _mm256_cmpgt_epi32(vrem89ABCDEF, vremainder_threshold));
 
-      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx2.output_zero_point);
+      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_zero_point);
       __m256i vout012389AB4567CDEF = _mm256_adds_epi16(_mm256_packs_epi32(vacc01234567, vacc89ABCDEF), voutput_zero_point);
 
       __m128i vout0123456789ABCDEF = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(vout012389AB4567CDEF), _mm256_extracti128_si256(vout012389AB4567CDEF, 1)), _MM_SHUFFLE(3, 1, 2, 0));
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
 
@@ -275,8 +275,8 @@
         w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
         k += 8;
 
-        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
         const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -288,21 +288,21 @@
 
         const __m256i vq31prod01234567 = _mm256_blend_epi16(vq31prod0246, vq31prod1357, 0xCC);
 
-        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
         const __m256i vrem01234567 =
           _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
 
-        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-        const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
+        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+        const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.shift);
         vacc01234567 =
           _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->avx2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
-        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
-        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
         vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
         vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
 
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx512skx-mul32.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx512skx-mul32.c
index 2d92800..c6355b6 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx512skx-mul32.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-avx512skx-mul32.c
@@ -31,14 +31,14 @@
   assert(output_width != 0);
 
   const __mmask16 vblend_mask = _cvtu32_mask16(0xAAAA);
-  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.multiplier));
-  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.rounding));
-  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.remainder_mask));
-  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.remainder_threshold));
-  const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
-  const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.output_zero_point));
-  const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.output_min));
-  const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.output_max));
+  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier));
+  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding));
+  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask));
+  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold));
+  const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
+  const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point));
+  const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min));
+  const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max));
 
   do {
     const int8_t* i0 = input[0];
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-neon-mul16.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-neon-mul16.c
index d21e9fc..d9159c1 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-neon-mul16.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-neon-mul16.c
@@ -29,12 +29,12 @@
   assert(channels != 0);
   assert(output_width != 0);
 
-  const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
-  const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+  const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
+  const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
   const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
-  const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
-  const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-  const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+  const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
+  const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+  const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
   do {
     const int8_t* i0 = input[0];
     assert(i0 != NULL);
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse2-mul16.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse2-mul16.c
index 17f7b55..de2556b 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse2-mul16.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse2-mul16.c
@@ -277,8 +277,8 @@
 
       w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t) + 144 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
       const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
       const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -344,7 +344,7 @@
       const __m128i vq31prod89AB = _mm_shuffle_epi32(vq31prod8A9B, _MM_SHUFFLE(3, 1, 2, 0));
       const __m128i vq31prodCDEF = _mm_shuffle_epi32(vq31prodCEDF, _MM_SHUFFLE(3, 1, 2, 0));
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -354,8 +354,8 @@
       const __m128i vremCDEF =
         _mm_add_epi32(_mm_and_si128(vq31prodCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodCDEF));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -365,15 +365,15 @@
       vaccCDEF =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
       vout01234567 = _mm_max_epi16(vout01234567, voutput_min);
       vout89ABCDEF = _mm_max_epi16(vout89ABCDEF, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
       vout01234567 = _mm_min_epi16(vout01234567, voutput_max);
       vout89ABCDEF = _mm_min_epi16(vout89ABCDEF, voutput_max);
 
@@ -510,8 +510,8 @@
         w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
         k += 8;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
         const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
         const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -550,24 +550,24 @@
         const __m128i vq31prod0123 = _mm_shuffle_epi32(vq31prod0213, _MM_SHUFFLE(3, 1, 2, 0));
         const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0));
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
-        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_min));
-        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_max));
+        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min));
+        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max));
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse41-mul16.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse41-mul16.c
index 6249771..3c6377c 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse41-mul16.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse41-mul16.c
@@ -277,8 +277,8 @@
 
       w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t) + 144 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -307,7 +307,7 @@
       const __m128i vq31prod89AB = _mm_blend_epi16(vq31prod8A, vq31prod9B, 0xCC);
       const __m128i vq31prodCDEF = _mm_blend_epi16(vq31prodCE, vq31prodDF, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -317,8 +317,8 @@
       const __m128i vremCDEF =
         _mm_add_epi32(_mm_and_si128(vq31prodCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodCDEF));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -328,17 +328,17 @@
       vaccCDEF =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
 
 
       __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
 
       _mm_storeu_si128((__m128i*) output, vout0123456789ABCDEF);
@@ -471,8 +471,8 @@
         w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
         k += 8;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
         const __m128i vacc57 = _mm_shuffle_epi32(vacc4567, _MM_SHUFFLE(3, 3, 1, 1));
@@ -491,27 +491,27 @@
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
         const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
-        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 8) {
           _mm_storel_epi64((__m128i*) output, vout0123456701234567);
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse41-mul32.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse41-mul32.c
index 1cfa1dd..16c8cc1 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse41-mul32.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-sse41-mul32.c
@@ -224,8 +224,8 @@
 
       w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t) + 144 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -254,7 +254,7 @@
       const __m128i vq31prod89AB = _mm_blend_epi16(vq31prod8A, vq31prod9B, 0xCC);
       const __m128i vq31prodCDEF = _mm_blend_epi16(vq31prodCE, vq31prodDF, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -264,8 +264,8 @@
       const __m128i vremCDEF =
         _mm_add_epi32(_mm_and_si128(vq31prodCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodCDEF));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -275,12 +275,12 @@
       vaccCDEF =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
@@ -351,8 +351,8 @@
         w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t));
         k += 4;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -364,21 +364,21 @@
 
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc0123), voutput_zero_point);
 
         vout0123 = _mm_packs_epi16(vout0123, vout0123);
-        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 4) {
           _mm_storeu_si32(output, vout0123);
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-ssse3-mul16.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-ssse3-mul16.c
index ed0f6d7..5229fe7 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-ssse3-mul16.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-ssse3-mul16.c
@@ -277,8 +277,8 @@
 
       w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t) + 144 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
       const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
       const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -344,7 +344,7 @@
       const __m128i vq31prod89AB = _mm_shuffle_epi32(vq31prod8A9B, _MM_SHUFFLE(3, 1, 2, 0));
       const __m128i vq31prodCDEF = _mm_shuffle_epi32(vq31prodCEDF, _MM_SHUFFLE(3, 1, 2, 0));
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -354,8 +354,8 @@
       const __m128i vremCDEF =
         _mm_add_epi32(_mm_and_si128(vq31prodCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodCDEF));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -365,15 +365,15 @@
       vaccCDEF =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
       vout01234567 = _mm_max_epi16(vout01234567, voutput_min);
       vout89ABCDEF = _mm_max_epi16(vout89ABCDEF, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
       vout01234567 = _mm_min_epi16(vout01234567, voutput_max);
       vout89ABCDEF = _mm_min_epi16(vout89ABCDEF, voutput_max);
 
@@ -510,8 +510,8 @@
         w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
         k += 8;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
         const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
         const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -550,24 +550,24 @@
         const __m128i vq31prod0123 = _mm_shuffle_epi32(vq31prod0213, _MM_SHUFFLE(3, 1, 2, 0));
         const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0));
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
-        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_min));
-        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_max));
+        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min));
+        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max));
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-wasmsimd-mul16.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-wasmsimd-mul16.c
index 04f51d4..2887a36 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-wasmsimd-mul16.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-wasmsimd-mul16.c
@@ -228,8 +228,8 @@
       const v128_t vaccCD = wasm_v32x4_shuffle(vaccCDEF, vsignCDEF, 0, 4, 1, 5);
       const v128_t vaccEF = wasm_v32x4_shuffle(vaccCDEF, vsignCDEF, 2, 6, 3, 7);
 
-      const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-      const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+      const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+      const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
       const v128_t vprod01 = wasm_i64x2_add(wasm_i64x2_mul(vacc01, vmultiplier), vrounding);
       const v128_t vprod23 = wasm_i64x2_add(wasm_i64x2_mul(vacc23, vmultiplier), vrounding);
       const v128_t vprod45 = wasm_i64x2_add(wasm_i64x2_mul(vacc45, vmultiplier), vrounding);
@@ -244,25 +244,25 @@
       const v128_t vq31prod89AB = wasm_v32x4_shuffle(vprod89, vprodAB, 1, 3, 5, 7);
       const v128_t vq31prodCDEF = wasm_v32x4_shuffle(vprodCD, vprodEF, 1, 3, 5, 7);
 
-      const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+      const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
       const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0123, vremainder_mask), wasm_i32x4_shr(vq31prod0123, 31));
       const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vq31prod4567, vremainder_mask), wasm_i32x4_shr(vq31prod4567, 31));
       const v128_t vrem89AB = wasm_i32x4_add(wasm_v128_and(vq31prod89AB, vremainder_mask), wasm_i32x4_shr(vq31prod89AB, 31));
       const v128_t vremCDEF = wasm_i32x4_add(wasm_v128_and(vq31prodCDEF, vremainder_mask), wasm_i32x4_shr(vq31prodCDEF, 31));
 
-      const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-      const int32_t vshift = params->wasmsimd.shift;
+      const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+      const int32_t vshift = params->gemmlowp_wasmsimd.shift;
       vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0123, vshift), wasm_i32x4_gt(vrem0123, vthreshold));
       vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod4567, vshift), wasm_i32x4_gt(vrem4567, vthreshold));
       vacc89AB = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod89AB, vshift), wasm_i32x4_gt(vrem89AB, vthreshold));
       vaccCDEF = wasm_i32x4_sub(wasm_i32x4_shr(vq31prodCDEF, vshift), wasm_i32x4_gt(vremCDEF, vthreshold));
 
-      const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+      const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
       v128_t vout01234567 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0123, vacc4567), voutput_zero_point);
       v128_t vout89ABCDEF = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc89AB, vaccCDEF), voutput_zero_point);
 
-      const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
-      const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+      const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
+      const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
       v128_t vout0123456789ABCDEF = wasm_i8x16_min(wasm_i8x16_max(wasm_i8x16_narrow_i16x8(vout01234567, vout89ABCDEF), voutput_min), voutput_max);
 
       wasm_v128_store(output, vout0123456789ABCDEF);
@@ -367,8 +367,8 @@
       const v128_t vacc45 = wasm_v32x4_shuffle(vacc4567, vsign4567, 0, 4, 1, 5);
       const v128_t vacc67 = wasm_v32x4_shuffle(vacc4567, vsign4567, 2, 6, 3, 7);
 
-      const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-      const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+      const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+      const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
       const v128_t vprod01 = wasm_i64x2_add(wasm_i64x2_mul(vacc01, vmultiplier), vrounding);
       const v128_t vprod23 = wasm_i64x2_add(wasm_i64x2_mul(vacc23, vmultiplier), vrounding);
       const v128_t vprod45 = wasm_i64x2_add(wasm_i64x2_mul(vacc45, vmultiplier), vrounding);
@@ -377,20 +377,20 @@
       const v128_t vq31prod0123 = wasm_v32x4_shuffle(vprod01, vprod23, 1, 3, 5, 7);
       const v128_t vq31prod4567 = wasm_v32x4_shuffle(vprod45, vprod67, 1, 3, 5, 7);
 
-      const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+      const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
       const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0123, vremainder_mask), wasm_i32x4_shr(vq31prod0123, 31));
       const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vq31prod4567, vremainder_mask), wasm_i32x4_shr(vq31prod4567, 31));
 
-      const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-      const int32_t vshift = params->wasmsimd.shift;
+      const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+      const int32_t vshift = params->gemmlowp_wasmsimd.shift;
       vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0123, vshift), wasm_i32x4_gt(vrem0123, vthreshold));
       vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod4567, vshift), wasm_i32x4_gt(vrem4567, vthreshold));
 
-      const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+      const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
       v128_t vout01234567 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0123, vacc4567), voutput_zero_point);
 
-      const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
-      const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+      const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
+      const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
       v128_t vout0123456701234567 = wasm_i8x16_min(wasm_i8x16_max(wasm_i8x16_narrow_i16x8(vout01234567, vout01234567), voutput_min), voutput_max);
 
       if XNN_LIKELY(c >= 8) {
diff --git a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-xop-mul32.c b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-xop-mul32.c
index 5dba590..21573ca 100644
--- a/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-xop-mul32.c
+++ b/src/qs8-dwconv/gen/up16x9-minmax-gemmlowp-xop-mul32.c
@@ -229,8 +229,8 @@
 
       w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t) + 144 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -259,7 +259,7 @@
       const __m128i vq31prod89AB = _mm_blend_epi16(vq31prod8A, vq31prod9B, 0xCC);
       const __m128i vq31prodCDEF = _mm_blend_epi16(vq31prodCE, vq31prodDF, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -269,8 +269,8 @@
       const __m128i vremCDEF =
         _mm_add_epi32(_mm_and_si128(vq31prodCDEF, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodCDEF));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -280,12 +280,12 @@
       vaccCDEF =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodCDEF, vshift), _mm_cmpgt_epi32(vremCDEF, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
@@ -356,8 +356,8 @@
         w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t));
         k += 4;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -369,21 +369,21 @@
 
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc0123), voutput_zero_point);
 
         vout0123 = _mm_packs_epi16(vout0123, vout0123);
-        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 4) {
           _mm_storeu_si32(output, vout0123);
diff --git a/src/qs8-dwconv/gen/up1x25-minmax-gemmlowp-scalar.c b/src/qs8-dwconv/gen/up1x25-minmax-gemmlowp-scalar.c
index 2923490..f472472 100644
--- a/src/qs8-dwconv/gen/up1x25-minmax-gemmlowp-scalar.c
+++ b/src/qs8-dwconv/gen/up1x25-minmax-gemmlowp-scalar.c
@@ -28,14 +28,14 @@
   assert(channels != 0);
   assert(output_width != 0);
 
-  const int32_t vmultiplier = params->scalar.multiplier;
+  const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
   const int64_t vq31rounding = INT64_C(0x40000000);
-  const int32_t vremainder_mask = params->scalar.remainder_mask;
-  const uint32_t vshift = params->scalar.shift;
-  const int32_t vremainder_threshold = params->scalar.remainder_threshold;
-  const int32_t vout_min = params->scalar.output_min_less_zero_point;
-  const int32_t vout_max = params->scalar.output_max_less_zero_point;
-  const int32_t voutput_zero_point = params->scalar.output_zero_point;
+  const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
+  const uint32_t vshift = params->gemmlowp_scalar.shift;
+  const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
+  const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
+  const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
+  const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
   do {
     const int8_t* i0 = input[0];
     assert(i0 != NULL);
diff --git a/src/qs8-dwconv/gen/up1x9-minmax-gemmlowp-scalar.c b/src/qs8-dwconv/gen/up1x9-minmax-gemmlowp-scalar.c
index 9980583..0279786 100644
--- a/src/qs8-dwconv/gen/up1x9-minmax-gemmlowp-scalar.c
+++ b/src/qs8-dwconv/gen/up1x9-minmax-gemmlowp-scalar.c
@@ -28,14 +28,14 @@
   assert(channels != 0);
   assert(output_width != 0);
 
-  const int32_t vmultiplier = params->scalar.multiplier;
+  const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
   const int64_t vq31rounding = INT64_C(0x40000000);
-  const int32_t vremainder_mask = params->scalar.remainder_mask;
-  const uint32_t vshift = params->scalar.shift;
-  const int32_t vremainder_threshold = params->scalar.remainder_threshold;
-  const int32_t vout_min = params->scalar.output_min_less_zero_point;
-  const int32_t vout_max = params->scalar.output_max_less_zero_point;
-  const int32_t voutput_zero_point = params->scalar.output_zero_point;
+  const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
+  const uint32_t vshift = params->gemmlowp_scalar.shift;
+  const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
+  const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
+  const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
+  const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
   do {
     const int8_t* i0 = input[0];
     assert(i0 != NULL);
diff --git a/src/qs8-dwconv/gen/up24x25-minmax-fp32-avx2-mul32.c b/src/qs8-dwconv/gen/up24x25-minmax-fp32-avx2-mul32.c
new file mode 100644
index 0000000..08f6a46
--- /dev/null
+++ b/src/qs8-dwconv/gen/up24x25-minmax-fp32-avx2-mul32.c
@@ -0,0 +1,699 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-dwconv/unipass-avx2-mul32.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <immintrin.h>
+
+#include <xnnpack/dwconv.h>
+
+
+void xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32(
+    size_t channels,
+    size_t output_width,
+    const int8_t** input,
+    const void* weights,
+    int8_t* output,
+    size_t input_stride,
+    size_t output_increment,
+    size_t input_offset,
+    const int8_t* zero,
+    const union xnn_qs8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(channels != 0);
+  assert(output_width != 0);
+
+  do {
+    const int8_t* i0 = input[0];
+    assert(i0 != NULL);
+    if XNN_UNPREDICTABLE(i0 != zero) {
+      i0 = (const int8_t*) ((uintptr_t) i0 + input_offset);
+    }
+    const int8_t* i1 = input[1];
+    assert(i1 != NULL);
+    if XNN_UNPREDICTABLE(i1 != zero) {
+      i1 = (const int8_t*) ((uintptr_t) i1 + input_offset);
+    }
+    const int8_t* i2 = input[2];
+    assert(i2 != NULL);
+    if XNN_UNPREDICTABLE(i2 != zero) {
+      i2 = (const int8_t*) ((uintptr_t) i2 + input_offset);
+    }
+    const int8_t* i3 = input[3];
+    assert(i3 != NULL);
+    if XNN_UNPREDICTABLE(i3 != zero) {
+      i3 = (const int8_t*) ((uintptr_t) i3 + input_offset);
+    }
+    const int8_t* i4 = input[4];
+    assert(i4 != NULL);
+    if XNN_UNPREDICTABLE(i4 != zero) {
+      i4 = (const int8_t*) ((uintptr_t) i4 + input_offset);
+    }
+    const int8_t* i5 = input[5];
+    assert(i5 != NULL);
+    if XNN_UNPREDICTABLE(i5 != zero) {
+      i5 = (const int8_t*) ((uintptr_t) i5 + input_offset);
+    }
+    const int8_t* i6 = input[6];
+    assert(i6 != NULL);
+    if XNN_UNPREDICTABLE(i6 != zero) {
+      i6 = (const int8_t*) ((uintptr_t) i6 + input_offset);
+    }
+    const int8_t* i7 = input[7];
+    assert(i7 != NULL);
+    if XNN_UNPREDICTABLE(i7 != zero) {
+      i7 = (const int8_t*) ((uintptr_t) i7 + input_offset);
+    }
+    const int8_t* i8 = input[8];
+    assert(i8 != NULL);
+    if XNN_UNPREDICTABLE(i8 != zero) {
+      i8 = (const int8_t*) ((uintptr_t) i8 + input_offset);
+    }
+    const int8_t* i9 = input[9];
+    assert(i9 != NULL);
+    if XNN_UNPREDICTABLE(i9 != zero) {
+      i9 = (const int8_t*) ((uintptr_t) i9 + input_offset);
+    }
+    const int8_t* i10 = input[10];
+    assert(i10 != NULL);
+    if XNN_UNPREDICTABLE(i10 != zero) {
+      i10 = (const int8_t*) ((uintptr_t) i10 + input_offset);
+    }
+    const int8_t* i11 = input[11];
+    assert(i11 != NULL);
+    if XNN_UNPREDICTABLE(i11 != zero) {
+      i11 = (const int8_t*) ((uintptr_t) i11 + input_offset);
+    }
+    const int8_t* i12 = input[12];
+    assert(i12 != NULL);
+    if XNN_UNPREDICTABLE(i12 != zero) {
+      i12 = (const int8_t*) ((uintptr_t) i12 + input_offset);
+    }
+    const int8_t* i13 = input[13];
+    assert(i13 != NULL);
+    if XNN_UNPREDICTABLE(i13 != zero) {
+      i13 = (const int8_t*) ((uintptr_t) i13 + input_offset);
+    }
+    const int8_t* i14 = input[14];
+    assert(i14 != NULL);
+    if XNN_UNPREDICTABLE(i14 != zero) {
+      i14 = (const int8_t*) ((uintptr_t) i14 + input_offset);
+    }
+    const int8_t* i15 = input[15];
+    assert(i15 != NULL);
+    if XNN_UNPREDICTABLE(i15 != zero) {
+      i15 = (const int8_t*) ((uintptr_t) i15 + input_offset);
+    }
+    const int8_t* i16 = input[16];
+    assert(i16 != NULL);
+    if XNN_UNPREDICTABLE(i16 != zero) {
+      i16 = (const int8_t*) ((uintptr_t) i16 + input_offset);
+    }
+    const int8_t* i17 = input[17];
+    assert(i17 != NULL);
+    if XNN_UNPREDICTABLE(i17 != zero) {
+      i17 = (const int8_t*) ((uintptr_t) i17 + input_offset);
+    }
+    const int8_t* i18 = input[18];
+    assert(i18 != NULL);
+    if XNN_UNPREDICTABLE(i18 != zero) {
+      i18 = (const int8_t*) ((uintptr_t) i18 + input_offset);
+    }
+    const int8_t* i19 = input[19];
+    assert(i19 != NULL);
+    if XNN_UNPREDICTABLE(i19 != zero) {
+      i19 = (const int8_t*) ((uintptr_t) i19 + input_offset);
+    }
+    const int8_t* i20 = input[20];
+    assert(i20 != NULL);
+    if XNN_UNPREDICTABLE(i20 != zero) {
+      i20 = (const int8_t*) ((uintptr_t) i20 + input_offset);
+    }
+    const int8_t* i21 = input[21];
+    assert(i21 != NULL);
+    if XNN_UNPREDICTABLE(i21 != zero) {
+      i21 = (const int8_t*) ((uintptr_t) i21 + input_offset);
+    }
+    const int8_t* i22 = input[22];
+    assert(i22 != NULL);
+    if XNN_UNPREDICTABLE(i22 != zero) {
+      i22 = (const int8_t*) ((uintptr_t) i22 + input_offset);
+    }
+    const int8_t* i23 = input[23];
+    assert(i23 != NULL);
+    if XNN_UNPREDICTABLE(i23 != zero) {
+      i23 = (const int8_t*) ((uintptr_t) i23 + input_offset);
+    }
+    const int8_t* i24 = input[24];
+    assert(i24 != NULL);
+    if XNN_UNPREDICTABLE(i24 != zero) {
+      i24 = (const int8_t*) ((uintptr_t) i24 + input_offset);
+    }
+    input = (const int8_t**) ((uintptr_t) input + input_stride);
+
+    size_t c = channels;
+    const void* w = weights;
+    for (; c >= 24; c -= 24) {
+      __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+      __m256i vacc89ABCDEF = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 8 * sizeof(int32_t)));
+      __m256i vaccGHIJKLMN = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 16 * sizeof(int32_t)));
+
+
+      const __m256i vi0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i0));
+      const __m256i vk0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 0 * sizeof(int8_t))));
+      const __m256i vi0x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i0 + 8)));
+      const __m256i vk0x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 8 * sizeof(int8_t))));
+      const __m256i vi0xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i0 + 16)));
+      const __m256i vk0xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 16 * sizeof(int8_t))));
+      i0 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi0x01234567, vk0x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi0x89ABCDEF, vk0x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi0xGHIJKLMN, vk0xGHIJKLMN));
+
+      const __m256i vi1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i1));
+      const __m256i vk1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 24 * sizeof(int8_t))));
+      const __m256i vi1x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i1 + 8)));
+      const __m256i vk1x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 32 * sizeof(int8_t))));
+      const __m256i vi1xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i1 + 16)));
+      const __m256i vk1xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 40 * sizeof(int8_t))));
+      i1 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi1x01234567, vk1x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi1x89ABCDEF, vk1x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi1xGHIJKLMN, vk1xGHIJKLMN));
+
+      const __m256i vi2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i2));
+      const __m256i vk2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 48 * sizeof(int8_t))));
+      const __m256i vi2x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i2 + 8)));
+      const __m256i vk2x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 56 * sizeof(int8_t))));
+      const __m256i vi2xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i2 + 16)));
+      const __m256i vk2xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 64 * sizeof(int8_t))));
+      i2 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi2x01234567, vk2x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi2x89ABCDEF, vk2x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi2xGHIJKLMN, vk2xGHIJKLMN));
+
+      const __m256i vi3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i3));
+      const __m256i vk3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 72 * sizeof(int8_t))));
+      const __m256i vi3x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i3 + 8)));
+      const __m256i vk3x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 80 * sizeof(int8_t))));
+      const __m256i vi3xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i3 + 16)));
+      const __m256i vk3xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 88 * sizeof(int8_t))));
+      i3 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi3x01234567, vk3x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi3x89ABCDEF, vk3x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi3xGHIJKLMN, vk3xGHIJKLMN));
+
+      const __m256i vi4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i4));
+      const __m256i vk4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 96 * sizeof(int8_t))));
+      const __m256i vi4x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i4 + 8)));
+      const __m256i vk4x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 104 * sizeof(int8_t))));
+      const __m256i vi4xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i4 + 16)));
+      const __m256i vk4xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 112 * sizeof(int8_t))));
+      i4 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi4x01234567, vk4x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi4x89ABCDEF, vk4x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi4xGHIJKLMN, vk4xGHIJKLMN));
+
+      const __m256i vi5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i5));
+      const __m256i vk5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 120 * sizeof(int8_t))));
+      const __m256i vi5x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i5 + 8)));
+      const __m256i vk5x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 128 * sizeof(int8_t))));
+      const __m256i vi5xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i5 + 16)));
+      const __m256i vk5xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 136 * sizeof(int8_t))));
+      i5 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi5x01234567, vk5x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi5x89ABCDEF, vk5x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi5xGHIJKLMN, vk5xGHIJKLMN));
+
+      const __m256i vi6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i6));
+      const __m256i vk6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 144 * sizeof(int8_t))));
+      const __m256i vi6x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i6 + 8)));
+      const __m256i vk6x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 152 * sizeof(int8_t))));
+      const __m256i vi6xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i6 + 16)));
+      const __m256i vk6xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 160 * sizeof(int8_t))));
+      i6 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi6x01234567, vk6x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi6x89ABCDEF, vk6x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi6xGHIJKLMN, vk6xGHIJKLMN));
+
+      const __m256i vi7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i7));
+      const __m256i vk7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 168 * sizeof(int8_t))));
+      const __m256i vi7x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i7 + 8)));
+      const __m256i vk7x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 176 * sizeof(int8_t))));
+      const __m256i vi7xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i7 + 16)));
+      const __m256i vk7xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 184 * sizeof(int8_t))));
+      i7 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi7x01234567, vk7x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi7x89ABCDEF, vk7x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi7xGHIJKLMN, vk7xGHIJKLMN));
+
+      const __m256i vi8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i8));
+      const __m256i vk8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 192 * sizeof(int8_t))));
+      const __m256i vi8x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i8 + 8)));
+      const __m256i vk8x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 200 * sizeof(int8_t))));
+      const __m256i vi8xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i8 + 16)));
+      const __m256i vk8xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 208 * sizeof(int8_t))));
+      i8 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi8x01234567, vk8x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi8x89ABCDEF, vk8x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi8xGHIJKLMN, vk8xGHIJKLMN));
+
+      const __m256i vi9x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i9));
+      const __m256i vk9x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 216 * sizeof(int8_t))));
+      const __m256i vi9x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i9 + 8)));
+      const __m256i vk9x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 224 * sizeof(int8_t))));
+      const __m256i vi9xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i9 + 16)));
+      const __m256i vk9xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 232 * sizeof(int8_t))));
+      i9 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi9x01234567, vk9x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi9x89ABCDEF, vk9x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi9xGHIJKLMN, vk9xGHIJKLMN));
+
+      const __m256i vi10x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i10));
+      const __m256i vk10x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 240 * sizeof(int8_t))));
+      const __m256i vi10x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i10 + 8)));
+      const __m256i vk10x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 248 * sizeof(int8_t))));
+      const __m256i vi10xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i10 + 16)));
+      const __m256i vk10xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 256 * sizeof(int8_t))));
+      i10 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi10x01234567, vk10x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi10x89ABCDEF, vk10x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi10xGHIJKLMN, vk10xGHIJKLMN));
+
+      const __m256i vi11x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i11));
+      const __m256i vk11x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 264 * sizeof(int8_t))));
+      const __m256i vi11x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i11 + 8)));
+      const __m256i vk11x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 272 * sizeof(int8_t))));
+      const __m256i vi11xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i11 + 16)));
+      const __m256i vk11xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 280 * sizeof(int8_t))));
+      i11 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi11x01234567, vk11x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi11x89ABCDEF, vk11x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi11xGHIJKLMN, vk11xGHIJKLMN));
+
+      const __m256i vi12x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i12));
+      const __m256i vk12x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 288 * sizeof(int8_t))));
+      const __m256i vi12x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i12 + 8)));
+      const __m256i vk12x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 296 * sizeof(int8_t))));
+      const __m256i vi12xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i12 + 16)));
+      const __m256i vk12xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 304 * sizeof(int8_t))));
+      i12 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi12x01234567, vk12x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi12x89ABCDEF, vk12x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi12xGHIJKLMN, vk12xGHIJKLMN));
+
+      const __m256i vi13x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i13));
+      const __m256i vk13x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 312 * sizeof(int8_t))));
+      const __m256i vi13x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i13 + 8)));
+      const __m256i vk13x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 320 * sizeof(int8_t))));
+      const __m256i vi13xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i13 + 16)));
+      const __m256i vk13xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 328 * sizeof(int8_t))));
+      i13 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi13x01234567, vk13x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi13x89ABCDEF, vk13x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi13xGHIJKLMN, vk13xGHIJKLMN));
+
+      const __m256i vi14x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i14));
+      const __m256i vk14x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 336 * sizeof(int8_t))));
+      const __m256i vi14x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i14 + 8)));
+      const __m256i vk14x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 344 * sizeof(int8_t))));
+      const __m256i vi14xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i14 + 16)));
+      const __m256i vk14xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 352 * sizeof(int8_t))));
+      i14 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi14x01234567, vk14x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi14x89ABCDEF, vk14x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi14xGHIJKLMN, vk14xGHIJKLMN));
+
+      const __m256i vi15x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i15));
+      const __m256i vk15x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 360 * sizeof(int8_t))));
+      const __m256i vi15x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i15 + 8)));
+      const __m256i vk15x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 368 * sizeof(int8_t))));
+      const __m256i vi15xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i15 + 16)));
+      const __m256i vk15xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 376 * sizeof(int8_t))));
+      i15 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi15x01234567, vk15x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi15x89ABCDEF, vk15x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi15xGHIJKLMN, vk15xGHIJKLMN));
+
+      const __m256i vi16x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i16));
+      const __m256i vk16x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 384 * sizeof(int8_t))));
+      const __m256i vi16x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i16 + 8)));
+      const __m256i vk16x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 392 * sizeof(int8_t))));
+      const __m256i vi16xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i16 + 16)));
+      const __m256i vk16xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 400 * sizeof(int8_t))));
+      i16 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi16x01234567, vk16x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi16x89ABCDEF, vk16x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi16xGHIJKLMN, vk16xGHIJKLMN));
+
+      const __m256i vi17x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i17));
+      const __m256i vk17x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 408 * sizeof(int8_t))));
+      const __m256i vi17x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i17 + 8)));
+      const __m256i vk17x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 416 * sizeof(int8_t))));
+      const __m256i vi17xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i17 + 16)));
+      const __m256i vk17xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 424 * sizeof(int8_t))));
+      i17 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi17x01234567, vk17x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi17x89ABCDEF, vk17x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi17xGHIJKLMN, vk17xGHIJKLMN));
+
+      const __m256i vi18x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i18));
+      const __m256i vk18x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 432 * sizeof(int8_t))));
+      const __m256i vi18x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i18 + 8)));
+      const __m256i vk18x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 440 * sizeof(int8_t))));
+      const __m256i vi18xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i18 + 16)));
+      const __m256i vk18xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 448 * sizeof(int8_t))));
+      i18 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi18x01234567, vk18x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi18x89ABCDEF, vk18x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi18xGHIJKLMN, vk18xGHIJKLMN));
+
+      const __m256i vi19x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i19));
+      const __m256i vk19x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 456 * sizeof(int8_t))));
+      const __m256i vi19x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i19 + 8)));
+      const __m256i vk19x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 464 * sizeof(int8_t))));
+      const __m256i vi19xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i19 + 16)));
+      const __m256i vk19xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 472 * sizeof(int8_t))));
+      i19 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi19x01234567, vk19x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi19x89ABCDEF, vk19x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi19xGHIJKLMN, vk19xGHIJKLMN));
+
+      const __m256i vi20x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i20));
+      const __m256i vk20x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 480 * sizeof(int8_t))));
+      const __m256i vi20x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i20 + 8)));
+      const __m256i vk20x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 488 * sizeof(int8_t))));
+      const __m256i vi20xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i20 + 16)));
+      const __m256i vk20xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 496 * sizeof(int8_t))));
+      i20 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi20x01234567, vk20x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi20x89ABCDEF, vk20x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi20xGHIJKLMN, vk20xGHIJKLMN));
+
+      const __m256i vi21x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i21));
+      const __m256i vk21x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 504 * sizeof(int8_t))));
+      const __m256i vi21x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i21 + 8)));
+      const __m256i vk21x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 512 * sizeof(int8_t))));
+      const __m256i vi21xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i21 + 16)));
+      const __m256i vk21xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 520 * sizeof(int8_t))));
+      i21 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi21x01234567, vk21x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi21x89ABCDEF, vk21x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi21xGHIJKLMN, vk21xGHIJKLMN));
+
+      const __m256i vi22x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i22));
+      const __m256i vk22x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 528 * sizeof(int8_t))));
+      const __m256i vi22x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i22 + 8)));
+      const __m256i vk22x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 536 * sizeof(int8_t))));
+      const __m256i vi22xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i22 + 16)));
+      const __m256i vk22xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 544 * sizeof(int8_t))));
+      i22 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi22x01234567, vk22x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi22x89ABCDEF, vk22x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi22xGHIJKLMN, vk22xGHIJKLMN));
+
+      const __m256i vi23x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i23));
+      const __m256i vk23x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 552 * sizeof(int8_t))));
+      const __m256i vi23x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i23 + 8)));
+      const __m256i vk23x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 560 * sizeof(int8_t))));
+      const __m256i vi23xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i23 + 16)));
+      const __m256i vk23xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 568 * sizeof(int8_t))));
+      i23 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi23x01234567, vk23x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi23x89ABCDEF, vk23x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi23xGHIJKLMN, vk23xGHIJKLMN));
+
+      const __m256i vi24x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i24));
+      const __m256i vk24x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 576 * sizeof(int8_t))));
+      const __m256i vi24x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i24 + 8)));
+      const __m256i vk24x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 584 * sizeof(int8_t))));
+      const __m256i vi24xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i24 + 16)));
+      const __m256i vk24xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 592 * sizeof(int8_t))));
+      i24 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi24x01234567, vk24x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi24x89ABCDEF, vk24x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi24xGHIJKLMN, vk24xGHIJKLMN));
+
+      w = (const void*) ((uintptr_t) w + 24 * sizeof(int32_t) + 600 * sizeof(int8_t));
+
+      __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+      __m256 vscaled89ABCDEF = _mm256_cvtepi32_ps(vacc89ABCDEF);
+      __m256 vscaledGHIJKLMN = _mm256_cvtepi32_ps(vaccGHIJKLMN);
+
+      const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+      vscaled01234567 = _mm256_mul_ps(vscaled01234567, vscale);
+      vscaled89ABCDEF = _mm256_mul_ps(vscaled89ABCDEF, vscale);
+      vscaledGHIJKLMN = _mm256_mul_ps(vscaledGHIJKLMN, vscale);
+
+      vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+      vacc89ABCDEF = _mm256_cvtps_epi32(vscaled89ABCDEF);
+      vaccGHIJKLMN = _mm256_cvtps_epi32(vscaledGHIJKLMN);
+
+      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->fp32_avx2.output_zero_point);
+      __m256i vout012389AB4567CDEF = _mm256_adds_epi16(_mm256_packs_epi32(vacc01234567, vacc89ABCDEF), voutput_zero_point);
+      __m128i voutGHIJKLMN = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vaccGHIJKLMN), _mm256_extracti128_si256(vaccGHIJKLMN, 1)), _mm256_castsi256_si128(voutput_zero_point));
+
+      __m128i vout0123456789ABCDEF = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(vout012389AB4567CDEF), _mm256_extracti128_si256(vout012389AB4567CDEF, 1)), _MM_SHUFFLE(3, 1, 2, 0));
+      __m128i voutGHIJKLMNGHIJKLMN = _mm_packs_epi16(voutGHIJKLMN, voutGHIJKLMN);
+
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+      vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
+      vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
+      voutGHIJKLMNGHIJKLMN = _mm_max_epi8(voutGHIJKLMNGHIJKLMN, voutput_min);
+      voutGHIJKLMNGHIJKLMN = _mm_min_epi8(voutGHIJKLMNGHIJKLMN, voutput_max);
+
+      _mm_storeu_si128((__m128i*) output, vout0123456789ABCDEF);
+      _mm_storel_epi64((__m128i*) (output + 16), voutGHIJKLMNGHIJKLMN);
+      output += 24;
+    }
+    if XNN_UNLIKELY(c != 0) {
+      const int8_t* k = (const int8_t*) ((uintptr_t) w + 24 * sizeof(int32_t));
+      do {
+        __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+
+
+        const __m256i vi0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i0));
+        const __m256i vk0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) k));
+        i0 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi0x01234567, vk0x01234567));
+
+        const __m256i vi1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i1));
+        const __m256i vk1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 24)));
+        i1 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi1x01234567, vk1x01234567));
+
+        const __m256i vi2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i2));
+        const __m256i vk2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 48)));
+        i2 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi2x01234567, vk2x01234567));
+
+        const __m256i vi3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i3));
+        const __m256i vk3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 72)));
+        i3 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi3x01234567, vk3x01234567));
+
+        const __m256i vi4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i4));
+        const __m256i vk4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 96)));
+        i4 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi4x01234567, vk4x01234567));
+
+        const __m256i vi5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i5));
+        const __m256i vk5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 120)));
+        i5 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi5x01234567, vk5x01234567));
+
+        const __m256i vi6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i6));
+        const __m256i vk6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 144)));
+        i6 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi6x01234567, vk6x01234567));
+
+        const __m256i vi7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i7));
+        const __m256i vk7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 168)));
+        i7 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi7x01234567, vk7x01234567));
+
+        const __m256i vi8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i8));
+        const __m256i vk8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 192)));
+        i8 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi8x01234567, vk8x01234567));
+
+        const __m256i vi9x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i9));
+        const __m256i vk9x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 216)));
+        i9 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi9x01234567, vk9x01234567));
+
+        const __m256i vi10x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i10));
+        const __m256i vk10x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 240)));
+        i10 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi10x01234567, vk10x01234567));
+
+        const __m256i vi11x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i11));
+        const __m256i vk11x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 264)));
+        i11 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi11x01234567, vk11x01234567));
+
+        const __m256i vi12x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i12));
+        const __m256i vk12x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 288)));
+        i12 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi12x01234567, vk12x01234567));
+
+        const __m256i vi13x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i13));
+        const __m256i vk13x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 312)));
+        i13 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi13x01234567, vk13x01234567));
+
+        const __m256i vi14x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i14));
+        const __m256i vk14x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 336)));
+        i14 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi14x01234567, vk14x01234567));
+
+        const __m256i vi15x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i15));
+        const __m256i vk15x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 360)));
+        i15 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi15x01234567, vk15x01234567));
+
+        const __m256i vi16x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i16));
+        const __m256i vk16x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 384)));
+        i16 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi16x01234567, vk16x01234567));
+
+        const __m256i vi17x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i17));
+        const __m256i vk17x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 408)));
+        i17 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi17x01234567, vk17x01234567));
+
+        const __m256i vi18x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i18));
+        const __m256i vk18x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 432)));
+        i18 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi18x01234567, vk18x01234567));
+
+        const __m256i vi19x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i19));
+        const __m256i vk19x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 456)));
+        i19 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi19x01234567, vk19x01234567));
+
+        const __m256i vi20x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i20));
+        const __m256i vk20x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 480)));
+        i20 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi20x01234567, vk20x01234567));
+
+        const __m256i vi21x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i21));
+        const __m256i vk21x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 504)));
+        i21 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi21x01234567, vk21x01234567));
+
+        const __m256i vi22x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i22));
+        const __m256i vk22x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 528)));
+        i22 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi22x01234567, vk22x01234567));
+
+        const __m256i vi23x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i23));
+        const __m256i vk23x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 552)));
+        i23 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi23x01234567, vk23x01234567));
+
+        const __m256i vi24x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i24));
+        const __m256i vk24x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 576)));
+        i24 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi24x01234567, vk24x01234567));
+
+        w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
+        k += 8;
+
+        __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+        vscaled01234567 = _mm256_mul_ps(vscaled01234567, _mm256_load_ps(params->fp32_avx2.scale));
+        vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->fp32_avx2.output_zero_point);
+        __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
+
+        __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
+        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
+
+        if XNN_LIKELY(c >= 8) {
+          _mm_storel_epi64((__m128i*) output, vout0123456701234567);
+          output += 8;
+          c -= 8;
+        } else {
+          if (c & 4) {
+            *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567);
+            vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32);
+            output += 4;
+          }
+          if (c & 2) {
+            *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0);
+            vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16);
+            output += 2;
+          }
+          if (c & 1) {
+            *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0);
+            output += 1;
+          }
+          c = 0;
+        }
+      } while (c != 0);
+    }
+
+    output = (int8_t*) ((uintptr_t) output + output_increment);
+  } while (--output_width != 0);
+}
diff --git a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx-mul16.c b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx-mul16.c
index 8da8409..6316096 100644
--- a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx-mul16.c
+++ b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx-mul16.c
@@ -895,8 +895,8 @@
 
       w = (const void*) ((uintptr_t) w + 24 * sizeof(int32_t) + 600 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -937,7 +937,7 @@
       const __m128i vq31prodGHIJ = _mm_blend_epi16(vq31prodGI, vq31prodHJ, 0xCC);
       const __m128i vq31prodKLMN = _mm_blend_epi16(vq31prodKM, vq31prodLN, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -951,8 +951,8 @@
       const __m128i vremKLMN =
         _mm_add_epi32(_mm_and_si128(vq31prodKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodKLMN));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -966,7 +966,7 @@
       vaccKLMN =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
       __m128i voutGHIJKLMN = _mm_adds_epi16(_mm_packs_epi32(vaccGHIJ, vaccKLMN), voutput_zero_point);
@@ -975,11 +975,11 @@
       __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
       __m128i voutGHIJKLMNGHIJKLMN = _mm_packs_epi16(voutGHIJKLMN, voutGHIJKLMN);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       voutGHIJKLMNGHIJKLMN = _mm_max_epi8(voutGHIJKLMNGHIJKLMN, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
       voutGHIJKLMNGHIJKLMN = _mm_min_epi8(voutGHIJKLMNGHIJKLMN, voutput_max);
 
@@ -1322,8 +1322,8 @@
         w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
         k += 8;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
         const __m128i vacc57 = _mm_shuffle_epi32(vacc4567, _MM_SHUFFLE(3, 3, 1, 1));
@@ -1342,27 +1342,27 @@
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
         const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
-        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 8) {
           _mm_storel_epi64((__m128i*) output, vout0123456701234567);
diff --git a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx-mul32.c b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx-mul32.c
index 56ef1a7..f676a89 100644
--- a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx-mul32.c
+++ b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx-mul32.c
@@ -696,8 +696,8 @@
 
       w = (const void*) ((uintptr_t) w + 24 * sizeof(int32_t) + 600 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -738,7 +738,7 @@
       const __m128i vq31prodGHIJ = _mm_blend_epi16(vq31prodGI, vq31prodHJ, 0xCC);
       const __m128i vq31prodKLMN = _mm_blend_epi16(vq31prodKM, vq31prodLN, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -752,8 +752,8 @@
       const __m128i vremKLMN =
         _mm_add_epi32(_mm_and_si128(vq31prodKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodKLMN));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -767,13 +767,13 @@
       vaccKLMN =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
       __m128i voutGHIJKLMN = _mm_adds_epi16(_mm_packs_epi32(vaccGHIJ, vaccKLMN), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
@@ -944,8 +944,8 @@
         w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t));
         k += 4;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -957,21 +957,21 @@
 
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc0123), voutput_zero_point);
 
         vout0123 = _mm_packs_epi16(vout0123, vout0123);
-        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 4) {
           _mm_storeu_si32(output, vout0123);
diff --git a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx2-mul32.c b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx2-mul32.c
index 1b127a2..bf2dab7 100644
--- a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx2-mul32.c
+++ b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-avx2-mul32.c
@@ -467,8 +467,8 @@
 
       w = (const void*) ((uintptr_t) w + 24 * sizeof(int32_t) + 600 * sizeof(int8_t));
 
-      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
       const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
       const __m256i vacc9BDF = _mm256_shuffle_epi32(vacc89ABCDEF, _MM_SHUFFLE(3, 3, 1, 1));
@@ -492,7 +492,7 @@
       const __m256i vq31prod89ABCDEF = _mm256_blend_epi16(vq31prod8ACE, vq31prod9BDF, 0xCC);
       const __m256i vq31prodGHIJKLMN = _mm256_blend_epi16(vq31prodGIKM, vq31prodHJLN, 0xCC);
 
-      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
       const __m256i vrem01234567 =
         _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
       const __m256i vrem89ABCDEF =
@@ -500,8 +500,8 @@
       const __m256i vremGHIJKLMN =
         _mm256_add_epi32(_mm256_and_si256(vq31prodGHIJKLMN, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prodGHIJKLMN));
 
-      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
+      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_avx2.shift);
       vacc01234567 =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
       vacc89ABCDEF =
@@ -509,15 +509,15 @@
       vaccGHIJKLMN =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prodGHIJKLMN, vshift), _mm256_cmpgt_epi32(vremGHIJKLMN, vremainder_threshold));
 
-      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx2.output_zero_point);
+      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_zero_point);
       __m256i vout012389AB4567CDEF = _mm256_adds_epi16(_mm256_packs_epi32(vacc01234567, vacc89ABCDEF), voutput_zero_point);
       __m128i voutGHIJKLMN = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vaccGHIJKLMN), _mm256_extracti128_si256(vaccGHIJKLMN, 1)), _mm256_castsi256_si128(voutput_zero_point));
 
       __m128i vout0123456789ABCDEF = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(vout012389AB4567CDEF), _mm256_extracti128_si256(vout012389AB4567CDEF, 1)), _MM_SHUFFLE(3, 1, 2, 0));
       __m128i voutGHIJKLMNGHIJKLMN = _mm_packs_epi16(voutGHIJKLMN, voutGHIJKLMN);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
       voutGHIJKLMNGHIJKLMN = _mm_max_epi8(voutGHIJKLMNGHIJKLMN, voutput_min);
@@ -686,8 +686,8 @@
         w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
         k += 8;
 
-        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
         const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -699,21 +699,21 @@
 
         const __m256i vq31prod01234567 = _mm256_blend_epi16(vq31prod0246, vq31prod1357, 0xCC);
 
-        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
         const __m256i vrem01234567 =
           _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
 
-        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-        const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
+        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+        const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.shift);
         vacc01234567 =
           _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->avx2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
-        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
-        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
         vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
         vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
 
diff --git a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-neon-mul16.c b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-neon-mul16.c
index 8644556..da8766f 100644
--- a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-neon-mul16.c
+++ b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-neon-mul16.c
@@ -29,12 +29,12 @@
   assert(channels != 0);
   assert(output_width != 0);
 
-  const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
-  const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+  const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
+  const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
   const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
-  const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
-  const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-  const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+  const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
+  const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+  const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
   do {
     const int8_t* i0 = input[0];
     assert(i0 != NULL);
diff --git a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse2-mul16.c b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse2-mul16.c
index ed180a1..52f6f3d 100644
--- a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse2-mul16.c
+++ b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse2-mul16.c
@@ -895,8 +895,8 @@
 
       w = (const void*) ((uintptr_t) w + 24 * sizeof(int32_t) + 600 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
       const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
       const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -990,7 +990,7 @@
       const __m128i vq31prodGHIJ = _mm_shuffle_epi32(vq31prodGIHJ, _MM_SHUFFLE(3, 1, 2, 0));
       const __m128i vq31prodKLMN = _mm_shuffle_epi32(vq31prodKMLN, _MM_SHUFFLE(3, 1, 2, 0));
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -1004,8 +1004,8 @@
       const __m128i vremKLMN =
         _mm_add_epi32(_mm_and_si128(vq31prodKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodKLMN));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -1019,17 +1019,17 @@
       vaccKLMN =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
       __m128i voutGHIJKLMN = _mm_adds_epi16(_mm_packs_epi32(vaccGHIJ, vaccKLMN), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
       vout01234567 = _mm_max_epi16(vout01234567, voutput_min);
       vout89ABCDEF = _mm_max_epi16(vout89ABCDEF, voutput_min);
       voutGHIJKLMN = _mm_max_epi16(voutGHIJKLMN, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
       vout01234567 = _mm_min_epi16(vout01234567, voutput_max);
       vout89ABCDEF = _mm_min_epi16(vout89ABCDEF, voutput_max);
       voutGHIJKLMN = _mm_min_epi16(voutGHIJKLMN, voutput_max);
@@ -1377,8 +1377,8 @@
         w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
         k += 8;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
         const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
         const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -1417,24 +1417,24 @@
         const __m128i vq31prod0123 = _mm_shuffle_epi32(vq31prod0213, _MM_SHUFFLE(3, 1, 2, 0));
         const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0));
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
-        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_min));
-        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_max));
+        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min));
+        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max));
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
diff --git a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse41-mul16.c b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse41-mul16.c
index bffa0c6..18ee6a7 100644
--- a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse41-mul16.c
+++ b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse41-mul16.c
@@ -895,8 +895,8 @@
 
       w = (const void*) ((uintptr_t) w + 24 * sizeof(int32_t) + 600 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -937,7 +937,7 @@
       const __m128i vq31prodGHIJ = _mm_blend_epi16(vq31prodGI, vq31prodHJ, 0xCC);
       const __m128i vq31prodKLMN = _mm_blend_epi16(vq31prodKM, vq31prodLN, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -951,8 +951,8 @@
       const __m128i vremKLMN =
         _mm_add_epi32(_mm_and_si128(vq31prodKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodKLMN));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -966,7 +966,7 @@
       vaccKLMN =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
       __m128i voutGHIJKLMN = _mm_adds_epi16(_mm_packs_epi32(vaccGHIJ, vaccKLMN), voutput_zero_point);
@@ -975,11 +975,11 @@
       __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
       __m128i voutGHIJKLMNGHIJKLMN = _mm_packs_epi16(voutGHIJKLMN, voutGHIJKLMN);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       voutGHIJKLMNGHIJKLMN = _mm_max_epi8(voutGHIJKLMNGHIJKLMN, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
       voutGHIJKLMNGHIJKLMN = _mm_min_epi8(voutGHIJKLMNGHIJKLMN, voutput_max);
 
@@ -1322,8 +1322,8 @@
         w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
         k += 8;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
         const __m128i vacc57 = _mm_shuffle_epi32(vacc4567, _MM_SHUFFLE(3, 3, 1, 1));
@@ -1342,27 +1342,27 @@
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
         const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
-        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 8) {
           _mm_storel_epi64((__m128i*) output, vout0123456701234567);
diff --git a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse41-mul32.c b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse41-mul32.c
index c0b7cc5..59a5fcd 100644
--- a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse41-mul32.c
+++ b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-sse41-mul32.c
@@ -696,8 +696,8 @@
 
       w = (const void*) ((uintptr_t) w + 24 * sizeof(int32_t) + 600 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -738,7 +738,7 @@
       const __m128i vq31prodGHIJ = _mm_blend_epi16(vq31prodGI, vq31prodHJ, 0xCC);
       const __m128i vq31prodKLMN = _mm_blend_epi16(vq31prodKM, vq31prodLN, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -752,8 +752,8 @@
       const __m128i vremKLMN =
         _mm_add_epi32(_mm_and_si128(vq31prodKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodKLMN));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -767,13 +767,13 @@
       vaccKLMN =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
       __m128i voutGHIJKLMN = _mm_adds_epi16(_mm_packs_epi32(vaccGHIJ, vaccKLMN), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
@@ -944,8 +944,8 @@
         w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t));
         k += 4;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -957,21 +957,21 @@
 
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc0123), voutput_zero_point);
 
         vout0123 = _mm_packs_epi16(vout0123, vout0123);
-        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 4) {
           _mm_storeu_si32(output, vout0123);
diff --git a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-ssse3-mul16.c b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-ssse3-mul16.c
index 0262f2a..c3efb42 100644
--- a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-ssse3-mul16.c
+++ b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-ssse3-mul16.c
@@ -895,8 +895,8 @@
 
       w = (const void*) ((uintptr_t) w + 24 * sizeof(int32_t) + 600 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
       const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
       const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -990,7 +990,7 @@
       const __m128i vq31prodGHIJ = _mm_shuffle_epi32(vq31prodGIHJ, _MM_SHUFFLE(3, 1, 2, 0));
       const __m128i vq31prodKLMN = _mm_shuffle_epi32(vq31prodKMLN, _MM_SHUFFLE(3, 1, 2, 0));
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -1004,8 +1004,8 @@
       const __m128i vremKLMN =
         _mm_add_epi32(_mm_and_si128(vq31prodKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodKLMN));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -1019,17 +1019,17 @@
       vaccKLMN =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
       __m128i voutGHIJKLMN = _mm_adds_epi16(_mm_packs_epi32(vaccGHIJ, vaccKLMN), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
       vout01234567 = _mm_max_epi16(vout01234567, voutput_min);
       vout89ABCDEF = _mm_max_epi16(vout89ABCDEF, voutput_min);
       voutGHIJKLMN = _mm_max_epi16(voutGHIJKLMN, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
       vout01234567 = _mm_min_epi16(vout01234567, voutput_max);
       vout89ABCDEF = _mm_min_epi16(vout89ABCDEF, voutput_max);
       voutGHIJKLMN = _mm_min_epi16(voutGHIJKLMN, voutput_max);
@@ -1377,8 +1377,8 @@
         w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
         k += 8;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
         const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
         const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -1417,24 +1417,24 @@
         const __m128i vq31prod0123 = _mm_shuffle_epi32(vq31prod0213, _MM_SHUFFLE(3, 1, 2, 0));
         const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0));
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
-        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_min));
-        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_max));
+        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min));
+        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max));
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
diff --git a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-wasmsimd-mul16.c b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-wasmsimd-mul16.c
index f52a48f..3b9667d 100644
--- a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-wasmsimd-mul16.c
+++ b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-wasmsimd-mul16.c
@@ -665,8 +665,8 @@
       const v128_t vaccKL = wasm_v32x4_shuffle(vaccKLMN, vsignKLMN, 0, 4, 1, 5);
       const v128_t vaccMN = wasm_v32x4_shuffle(vaccKLMN, vsignKLMN, 2, 6, 3, 7);
 
-      const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-      const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+      const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+      const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
       const v128_t vprod01 = wasm_i64x2_add(wasm_i64x2_mul(vacc01, vmultiplier), vrounding);
       const v128_t vprod23 = wasm_i64x2_add(wasm_i64x2_mul(vacc23, vmultiplier), vrounding);
       const v128_t vprod45 = wasm_i64x2_add(wasm_i64x2_mul(vacc45, vmultiplier), vrounding);
@@ -687,7 +687,7 @@
       const v128_t vq31prodGHIJ = wasm_v32x4_shuffle(vprodGH, vprodIJ, 1, 3, 5, 7);
       const v128_t vq31prodKLMN = wasm_v32x4_shuffle(vprodKL, vprodMN, 1, 3, 5, 7);
 
-      const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+      const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
       const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0123, vremainder_mask), wasm_i32x4_shr(vq31prod0123, 31));
       const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vq31prod4567, vremainder_mask), wasm_i32x4_shr(vq31prod4567, 31));
       const v128_t vrem89AB = wasm_i32x4_add(wasm_v128_and(vq31prod89AB, vremainder_mask), wasm_i32x4_shr(vq31prod89AB, 31));
@@ -695,8 +695,8 @@
       const v128_t vremGHIJ = wasm_i32x4_add(wasm_v128_and(vq31prodGHIJ, vremainder_mask), wasm_i32x4_shr(vq31prodGHIJ, 31));
       const v128_t vremKLMN = wasm_i32x4_add(wasm_v128_and(vq31prodKLMN, vremainder_mask), wasm_i32x4_shr(vq31prodKLMN, 31));
 
-      const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-      const int32_t vshift = params->wasmsimd.shift;
+      const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+      const int32_t vshift = params->gemmlowp_wasmsimd.shift;
       vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0123, vshift), wasm_i32x4_gt(vrem0123, vthreshold));
       vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod4567, vshift), wasm_i32x4_gt(vrem4567, vthreshold));
       vacc89AB = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod89AB, vshift), wasm_i32x4_gt(vrem89AB, vthreshold));
@@ -704,13 +704,13 @@
       vaccGHIJ = wasm_i32x4_sub(wasm_i32x4_shr(vq31prodGHIJ, vshift), wasm_i32x4_gt(vremGHIJ, vthreshold));
       vaccKLMN = wasm_i32x4_sub(wasm_i32x4_shr(vq31prodKLMN, vshift), wasm_i32x4_gt(vremKLMN, vthreshold));
 
-      const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+      const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
       v128_t vout01234567 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0123, vacc4567), voutput_zero_point);
       v128_t vout89ABCDEF = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc89AB, vaccCDEF), voutput_zero_point);
       v128_t voutGHIJKLMN = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vaccGHIJ, vaccKLMN), voutput_zero_point);
 
-      const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
-      const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+      const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
+      const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
       v128_t vout0123456789ABCDEF = wasm_i8x16_min(wasm_i8x16_max(wasm_i8x16_narrow_i16x8(vout01234567, vout89ABCDEF), voutput_min), voutput_max);
       v128_t voutGHIJKLMNGHIJKLMN = wasm_i8x16_min(wasm_i8x16_max(wasm_i8x16_narrow_i16x8(voutGHIJKLMN, voutGHIJKLMN), voutput_min), voutput_max);
 
@@ -961,8 +961,8 @@
       const v128_t vacc45 = wasm_v32x4_shuffle(vacc4567, vsign4567, 0, 4, 1, 5);
       const v128_t vacc67 = wasm_v32x4_shuffle(vacc4567, vsign4567, 2, 6, 3, 7);
 
-      const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-      const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+      const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+      const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
       const v128_t vprod01 = wasm_i64x2_add(wasm_i64x2_mul(vacc01, vmultiplier), vrounding);
       const v128_t vprod23 = wasm_i64x2_add(wasm_i64x2_mul(vacc23, vmultiplier), vrounding);
       const v128_t vprod45 = wasm_i64x2_add(wasm_i64x2_mul(vacc45, vmultiplier), vrounding);
@@ -971,20 +971,20 @@
       const v128_t vq31prod0123 = wasm_v32x4_shuffle(vprod01, vprod23, 1, 3, 5, 7);
       const v128_t vq31prod4567 = wasm_v32x4_shuffle(vprod45, vprod67, 1, 3, 5, 7);
 
-      const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+      const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
       const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0123, vremainder_mask), wasm_i32x4_shr(vq31prod0123, 31));
       const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vq31prod4567, vremainder_mask), wasm_i32x4_shr(vq31prod4567, 31));
 
-      const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-      const int32_t vshift = params->wasmsimd.shift;
+      const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+      const int32_t vshift = params->gemmlowp_wasmsimd.shift;
       vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0123, vshift), wasm_i32x4_gt(vrem0123, vthreshold));
       vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod4567, vshift), wasm_i32x4_gt(vrem4567, vthreshold));
 
-      const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+      const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
       v128_t vout01234567 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0123, vacc4567), voutput_zero_point);
 
-      const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
-      const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+      const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
+      const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
       v128_t vout0123456701234567 = wasm_i8x16_min(wasm_i8x16_max(wasm_i8x16_narrow_i16x8(vout01234567, vout01234567), voutput_min), voutput_max);
 
       if XNN_LIKELY(c >= 8) {
diff --git a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-xop-mul32.c b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-xop-mul32.c
index cc184a9..a7caee2 100644
--- a/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-xop-mul32.c
+++ b/src/qs8-dwconv/gen/up24x25-minmax-gemmlowp-xop-mul32.c
@@ -701,8 +701,8 @@
 
       w = (const void*) ((uintptr_t) w + 24 * sizeof(int32_t) + 600 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -743,7 +743,7 @@
       const __m128i vq31prodGHIJ = _mm_blend_epi16(vq31prodGI, vq31prodHJ, 0xCC);
       const __m128i vq31prodKLMN = _mm_blend_epi16(vq31prodKM, vq31prodLN, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -757,8 +757,8 @@
       const __m128i vremKLMN =
         _mm_add_epi32(_mm_and_si128(vq31prodKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodKLMN));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -772,13 +772,13 @@
       vaccKLMN =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
       __m128i voutGHIJKLMN = _mm_adds_epi16(_mm_packs_epi32(vaccGHIJ, vaccKLMN), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
@@ -949,8 +949,8 @@
         w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t));
         k += 4;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -962,21 +962,21 @@
 
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc0123), voutput_zero_point);
 
         vout0123 = _mm_packs_epi16(vout0123, vout0123);
-        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 4) {
           _mm_storeu_si32(output, vout0123);
diff --git a/src/qs8-dwconv/gen/up24x9-minmax-fp32-avx2-mul32.c b/src/qs8-dwconv/gen/up24x9-minmax-fp32-avx2-mul32.c
new file mode 100644
index 0000000..c10fa47
--- /dev/null
+++ b/src/qs8-dwconv/gen/up24x9-minmax-fp32-avx2-mul32.c
@@ -0,0 +1,331 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-dwconv/unipass-avx2-mul32.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <immintrin.h>
+
+#include <xnnpack/dwconv.h>
+
+
+void xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32(
+    size_t channels,
+    size_t output_width,
+    const int8_t** input,
+    const void* weights,
+    int8_t* output,
+    size_t input_stride,
+    size_t output_increment,
+    size_t input_offset,
+    const int8_t* zero,
+    const union xnn_qs8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(channels != 0);
+  assert(output_width != 0);
+
+  do {
+    const int8_t* i0 = input[0];
+    assert(i0 != NULL);
+    if XNN_UNPREDICTABLE(i0 != zero) {
+      i0 = (const int8_t*) ((uintptr_t) i0 + input_offset);
+    }
+    const int8_t* i1 = input[1];
+    assert(i1 != NULL);
+    if XNN_UNPREDICTABLE(i1 != zero) {
+      i1 = (const int8_t*) ((uintptr_t) i1 + input_offset);
+    }
+    const int8_t* i2 = input[2];
+    assert(i2 != NULL);
+    if XNN_UNPREDICTABLE(i2 != zero) {
+      i2 = (const int8_t*) ((uintptr_t) i2 + input_offset);
+    }
+    const int8_t* i3 = input[3];
+    assert(i3 != NULL);
+    if XNN_UNPREDICTABLE(i3 != zero) {
+      i3 = (const int8_t*) ((uintptr_t) i3 + input_offset);
+    }
+    const int8_t* i4 = input[4];
+    assert(i4 != NULL);
+    if XNN_UNPREDICTABLE(i4 != zero) {
+      i4 = (const int8_t*) ((uintptr_t) i4 + input_offset);
+    }
+    const int8_t* i5 = input[5];
+    assert(i5 != NULL);
+    if XNN_UNPREDICTABLE(i5 != zero) {
+      i5 = (const int8_t*) ((uintptr_t) i5 + input_offset);
+    }
+    const int8_t* i6 = input[6];
+    assert(i6 != NULL);
+    if XNN_UNPREDICTABLE(i6 != zero) {
+      i6 = (const int8_t*) ((uintptr_t) i6 + input_offset);
+    }
+    const int8_t* i7 = input[7];
+    assert(i7 != NULL);
+    if XNN_UNPREDICTABLE(i7 != zero) {
+      i7 = (const int8_t*) ((uintptr_t) i7 + input_offset);
+    }
+    const int8_t* i8 = input[8];
+    assert(i8 != NULL);
+    if XNN_UNPREDICTABLE(i8 != zero) {
+      i8 = (const int8_t*) ((uintptr_t) i8 + input_offset);
+    }
+    input = (const int8_t**) ((uintptr_t) input + input_stride);
+
+    size_t c = channels;
+    const void* w = weights;
+    for (; c >= 24; c -= 24) {
+      __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+      __m256i vacc89ABCDEF = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 8 * sizeof(int32_t)));
+      __m256i vaccGHIJKLMN = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 16 * sizeof(int32_t)));
+
+
+      const __m256i vi0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i0));
+      const __m256i vk0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 0 * sizeof(int8_t))));
+      const __m256i vi0x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i0 + 8)));
+      const __m256i vk0x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 8 * sizeof(int8_t))));
+      const __m256i vi0xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i0 + 16)));
+      const __m256i vk0xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 16 * sizeof(int8_t))));
+      i0 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi0x01234567, vk0x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi0x89ABCDEF, vk0x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi0xGHIJKLMN, vk0xGHIJKLMN));
+
+      const __m256i vi1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i1));
+      const __m256i vk1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 24 * sizeof(int8_t))));
+      const __m256i vi1x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i1 + 8)));
+      const __m256i vk1x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 32 * sizeof(int8_t))));
+      const __m256i vi1xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i1 + 16)));
+      const __m256i vk1xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 40 * sizeof(int8_t))));
+      i1 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi1x01234567, vk1x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi1x89ABCDEF, vk1x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi1xGHIJKLMN, vk1xGHIJKLMN));
+
+      const __m256i vi2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i2));
+      const __m256i vk2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 48 * sizeof(int8_t))));
+      const __m256i vi2x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i2 + 8)));
+      const __m256i vk2x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 56 * sizeof(int8_t))));
+      const __m256i vi2xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i2 + 16)));
+      const __m256i vk2xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 64 * sizeof(int8_t))));
+      i2 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi2x01234567, vk2x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi2x89ABCDEF, vk2x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi2xGHIJKLMN, vk2xGHIJKLMN));
+
+      const __m256i vi3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i3));
+      const __m256i vk3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 72 * sizeof(int8_t))));
+      const __m256i vi3x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i3 + 8)));
+      const __m256i vk3x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 80 * sizeof(int8_t))));
+      const __m256i vi3xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i3 + 16)));
+      const __m256i vk3xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 88 * sizeof(int8_t))));
+      i3 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi3x01234567, vk3x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi3x89ABCDEF, vk3x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi3xGHIJKLMN, vk3xGHIJKLMN));
+
+      const __m256i vi4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i4));
+      const __m256i vk4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 96 * sizeof(int8_t))));
+      const __m256i vi4x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i4 + 8)));
+      const __m256i vk4x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 104 * sizeof(int8_t))));
+      const __m256i vi4xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i4 + 16)));
+      const __m256i vk4xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 112 * sizeof(int8_t))));
+      i4 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi4x01234567, vk4x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi4x89ABCDEF, vk4x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi4xGHIJKLMN, vk4xGHIJKLMN));
+
+      const __m256i vi5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i5));
+      const __m256i vk5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 120 * sizeof(int8_t))));
+      const __m256i vi5x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i5 + 8)));
+      const __m256i vk5x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 128 * sizeof(int8_t))));
+      const __m256i vi5xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i5 + 16)));
+      const __m256i vk5xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 136 * sizeof(int8_t))));
+      i5 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi5x01234567, vk5x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi5x89ABCDEF, vk5x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi5xGHIJKLMN, vk5xGHIJKLMN));
+
+      const __m256i vi6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i6));
+      const __m256i vk6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 144 * sizeof(int8_t))));
+      const __m256i vi6x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i6 + 8)));
+      const __m256i vk6x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 152 * sizeof(int8_t))));
+      const __m256i vi6xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i6 + 16)));
+      const __m256i vk6xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 160 * sizeof(int8_t))));
+      i6 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi6x01234567, vk6x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi6x89ABCDEF, vk6x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi6xGHIJKLMN, vk6xGHIJKLMN));
+
+      const __m256i vi7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i7));
+      const __m256i vk7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 168 * sizeof(int8_t))));
+      const __m256i vi7x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i7 + 8)));
+      const __m256i vk7x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 176 * sizeof(int8_t))));
+      const __m256i vi7xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i7 + 16)));
+      const __m256i vk7xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 184 * sizeof(int8_t))));
+      i7 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi7x01234567, vk7x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi7x89ABCDEF, vk7x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi7xGHIJKLMN, vk7xGHIJKLMN));
+
+      const __m256i vi8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i8));
+      const __m256i vk8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 192 * sizeof(int8_t))));
+      const __m256i vi8x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i8 + 8)));
+      const __m256i vk8x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 200 * sizeof(int8_t))));
+      const __m256i vi8xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i8 + 16)));
+      const __m256i vk8xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 24 * sizeof(int32_t) + 208 * sizeof(int8_t))));
+      i8 += 24;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi8x01234567, vk8x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi8x89ABCDEF, vk8x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi8xGHIJKLMN, vk8xGHIJKLMN));
+
+      w = (const void*) ((uintptr_t) w + 24 * sizeof(int32_t) + 216 * sizeof(int8_t));
+
+      __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+      __m256 vscaled89ABCDEF = _mm256_cvtepi32_ps(vacc89ABCDEF);
+      __m256 vscaledGHIJKLMN = _mm256_cvtepi32_ps(vaccGHIJKLMN);
+
+      const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+      vscaled01234567 = _mm256_mul_ps(vscaled01234567, vscale);
+      vscaled89ABCDEF = _mm256_mul_ps(vscaled89ABCDEF, vscale);
+      vscaledGHIJKLMN = _mm256_mul_ps(vscaledGHIJKLMN, vscale);
+
+      vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+      vacc89ABCDEF = _mm256_cvtps_epi32(vscaled89ABCDEF);
+      vaccGHIJKLMN = _mm256_cvtps_epi32(vscaledGHIJKLMN);
+
+      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->fp32_avx2.output_zero_point);
+      __m256i vout012389AB4567CDEF = _mm256_adds_epi16(_mm256_packs_epi32(vacc01234567, vacc89ABCDEF), voutput_zero_point);
+      __m128i voutGHIJKLMN = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vaccGHIJKLMN), _mm256_extracti128_si256(vaccGHIJKLMN, 1)), _mm256_castsi256_si128(voutput_zero_point));
+
+      __m128i vout0123456789ABCDEF = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(vout012389AB4567CDEF), _mm256_extracti128_si256(vout012389AB4567CDEF, 1)), _MM_SHUFFLE(3, 1, 2, 0));
+      __m128i voutGHIJKLMNGHIJKLMN = _mm_packs_epi16(voutGHIJKLMN, voutGHIJKLMN);
+
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+      vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
+      vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
+      voutGHIJKLMNGHIJKLMN = _mm_max_epi8(voutGHIJKLMNGHIJKLMN, voutput_min);
+      voutGHIJKLMNGHIJKLMN = _mm_min_epi8(voutGHIJKLMNGHIJKLMN, voutput_max);
+
+      _mm_storeu_si128((__m128i*) output, vout0123456789ABCDEF);
+      _mm_storel_epi64((__m128i*) (output + 16), voutGHIJKLMNGHIJKLMN);
+      output += 24;
+    }
+    if XNN_UNLIKELY(c != 0) {
+      const int8_t* k = (const int8_t*) ((uintptr_t) w + 24 * sizeof(int32_t));
+      do {
+        __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+
+
+        const __m256i vi0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i0));
+        const __m256i vk0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) k));
+        i0 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi0x01234567, vk0x01234567));
+
+        const __m256i vi1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i1));
+        const __m256i vk1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 24)));
+        i1 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi1x01234567, vk1x01234567));
+
+        const __m256i vi2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i2));
+        const __m256i vk2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 48)));
+        i2 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi2x01234567, vk2x01234567));
+
+        const __m256i vi3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i3));
+        const __m256i vk3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 72)));
+        i3 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi3x01234567, vk3x01234567));
+
+        const __m256i vi4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i4));
+        const __m256i vk4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 96)));
+        i4 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi4x01234567, vk4x01234567));
+
+        const __m256i vi5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i5));
+        const __m256i vk5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 120)));
+        i5 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi5x01234567, vk5x01234567));
+
+        const __m256i vi6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i6));
+        const __m256i vk6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 144)));
+        i6 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi6x01234567, vk6x01234567));
+
+        const __m256i vi7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i7));
+        const __m256i vk7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 168)));
+        i7 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi7x01234567, vk7x01234567));
+
+        const __m256i vi8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i8));
+        const __m256i vk8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 192)));
+        i8 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi8x01234567, vk8x01234567));
+
+        w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
+        k += 8;
+
+        __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+        vscaled01234567 = _mm256_mul_ps(vscaled01234567, _mm256_load_ps(params->fp32_avx2.scale));
+        vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->fp32_avx2.output_zero_point);
+        __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
+
+        __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
+        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
+
+        if XNN_LIKELY(c >= 8) {
+          _mm_storel_epi64((__m128i*) output, vout0123456701234567);
+          output += 8;
+          c -= 8;
+        } else {
+          if (c & 4) {
+            *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567);
+            vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32);
+            output += 4;
+          }
+          if (c & 2) {
+            *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0);
+            vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16);
+            output += 2;
+          }
+          if (c & 1) {
+            *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0);
+            output += 1;
+          }
+          c = 0;
+        }
+      } while (c != 0);
+    }
+
+    output = (int8_t*) ((uintptr_t) output + output_increment);
+  } while (--output_width != 0);
+}
diff --git a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx-mul16.c b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx-mul16.c
index 4b34be0..4e6fe18 100644
--- a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx-mul16.c
+++ b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx-mul16.c
@@ -351,8 +351,8 @@
 
       w = (const void*) ((uintptr_t) w + 24 * sizeof(int32_t) + 216 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -393,7 +393,7 @@
       const __m128i vq31prodGHIJ = _mm_blend_epi16(vq31prodGI, vq31prodHJ, 0xCC);
       const __m128i vq31prodKLMN = _mm_blend_epi16(vq31prodKM, vq31prodLN, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -407,8 +407,8 @@
       const __m128i vremKLMN =
         _mm_add_epi32(_mm_and_si128(vq31prodKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodKLMN));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -422,7 +422,7 @@
       vaccKLMN =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
       __m128i voutGHIJKLMN = _mm_adds_epi16(_mm_packs_epi32(vaccGHIJ, vaccKLMN), voutput_zero_point);
@@ -431,11 +431,11 @@
       __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
       __m128i voutGHIJKLMNGHIJKLMN = _mm_packs_epi16(voutGHIJKLMN, voutGHIJKLMN);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       voutGHIJKLMNGHIJKLMN = _mm_max_epi8(voutGHIJKLMNGHIJKLMN, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
       voutGHIJKLMNGHIJKLMN = _mm_min_epi8(voutGHIJKLMNGHIJKLMN, voutput_max);
 
@@ -570,8 +570,8 @@
         w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
         k += 8;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
         const __m128i vacc57 = _mm_shuffle_epi32(vacc4567, _MM_SHUFFLE(3, 3, 1, 1));
@@ -590,27 +590,27 @@
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
         const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
-        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 8) {
           _mm_storel_epi64((__m128i*) output, vout0123456701234567);
diff --git a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx-mul32.c b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx-mul32.c
index f96e89d..027dbbe 100644
--- a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx-mul32.c
+++ b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx-mul32.c
@@ -280,8 +280,8 @@
 
       w = (const void*) ((uintptr_t) w + 24 * sizeof(int32_t) + 216 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -322,7 +322,7 @@
       const __m128i vq31prodGHIJ = _mm_blend_epi16(vq31prodGI, vq31prodHJ, 0xCC);
       const __m128i vq31prodKLMN = _mm_blend_epi16(vq31prodKM, vq31prodLN, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -336,8 +336,8 @@
       const __m128i vremKLMN =
         _mm_add_epi32(_mm_and_si128(vq31prodKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodKLMN));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -351,13 +351,13 @@
       vaccKLMN =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
       __m128i voutGHIJKLMN = _mm_adds_epi16(_mm_packs_epi32(vaccGHIJ, vaccKLMN), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
@@ -432,8 +432,8 @@
         w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t));
         k += 4;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -445,21 +445,21 @@
 
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc0123), voutput_zero_point);
 
         vout0123 = _mm_packs_epi16(vout0123, vout0123);
-        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 4) {
           _mm_storeu_si32(output, vout0123);
diff --git a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx2-mul32.c b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx2-mul32.c
index 9642bde..1fe0d8d 100644
--- a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx2-mul32.c
+++ b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-avx2-mul32.c
@@ -195,8 +195,8 @@
 
       w = (const void*) ((uintptr_t) w + 24 * sizeof(int32_t) + 216 * sizeof(int8_t));
 
-      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
       const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
       const __m256i vacc9BDF = _mm256_shuffle_epi32(vacc89ABCDEF, _MM_SHUFFLE(3, 3, 1, 1));
@@ -220,7 +220,7 @@
       const __m256i vq31prod89ABCDEF = _mm256_blend_epi16(vq31prod8ACE, vq31prod9BDF, 0xCC);
       const __m256i vq31prodGHIJKLMN = _mm256_blend_epi16(vq31prodGIKM, vq31prodHJLN, 0xCC);
 
-      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
       const __m256i vrem01234567 =
         _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
       const __m256i vrem89ABCDEF =
@@ -228,8 +228,8 @@
       const __m256i vremGHIJKLMN =
         _mm256_add_epi32(_mm256_and_si256(vq31prodGHIJKLMN, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prodGHIJKLMN));
 
-      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
+      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_avx2.shift);
       vacc01234567 =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
       vacc89ABCDEF =
@@ -237,15 +237,15 @@
       vaccGHIJKLMN =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prodGHIJKLMN, vshift), _mm256_cmpgt_epi32(vremGHIJKLMN, vremainder_threshold));
 
-      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx2.output_zero_point);
+      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_zero_point);
       __m256i vout012389AB4567CDEF = _mm256_adds_epi16(_mm256_packs_epi32(vacc01234567, vacc89ABCDEF), voutput_zero_point);
       __m128i voutGHIJKLMN = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vaccGHIJKLMN), _mm256_extracti128_si256(vaccGHIJKLMN, 1)), _mm256_castsi256_si128(voutput_zero_point));
 
       __m128i vout0123456789ABCDEF = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(vout012389AB4567CDEF), _mm256_extracti128_si256(vout012389AB4567CDEF, 1)), _MM_SHUFFLE(3, 1, 2, 0));
       __m128i voutGHIJKLMNGHIJKLMN = _mm_packs_epi16(voutGHIJKLMN, voutGHIJKLMN);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
       voutGHIJKLMNGHIJKLMN = _mm_max_epi8(voutGHIJKLMNGHIJKLMN, voutput_min);
@@ -318,8 +318,8 @@
         w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
         k += 8;
 
-        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
         const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -331,21 +331,21 @@
 
         const __m256i vq31prod01234567 = _mm256_blend_epi16(vq31prod0246, vq31prod1357, 0xCC);
 
-        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
         const __m256i vrem01234567 =
           _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
 
-        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-        const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
+        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+        const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.shift);
         vacc01234567 =
           _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->avx2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
-        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
-        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
         vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
         vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
 
diff --git a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-neon-mul16.c b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-neon-mul16.c
index fa108eb..97998c7 100644
--- a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-neon-mul16.c
+++ b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-neon-mul16.c
@@ -29,12 +29,12 @@
   assert(channels != 0);
   assert(output_width != 0);
 
-  const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
-  const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+  const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
+  const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
   const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
-  const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
-  const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-  const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+  const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
+  const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+  const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
   do {
     const int8_t* i0 = input[0];
     assert(i0 != NULL);
diff --git a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse2-mul16.c b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse2-mul16.c
index e0526bf..320dc68 100644
--- a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse2-mul16.c
+++ b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse2-mul16.c
@@ -351,8 +351,8 @@
 
       w = (const void*) ((uintptr_t) w + 24 * sizeof(int32_t) + 216 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
       const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
       const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -446,7 +446,7 @@
       const __m128i vq31prodGHIJ = _mm_shuffle_epi32(vq31prodGIHJ, _MM_SHUFFLE(3, 1, 2, 0));
       const __m128i vq31prodKLMN = _mm_shuffle_epi32(vq31prodKMLN, _MM_SHUFFLE(3, 1, 2, 0));
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -460,8 +460,8 @@
       const __m128i vremKLMN =
         _mm_add_epi32(_mm_and_si128(vq31prodKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodKLMN));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -475,17 +475,17 @@
       vaccKLMN =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
       __m128i voutGHIJKLMN = _mm_adds_epi16(_mm_packs_epi32(vaccGHIJ, vaccKLMN), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
       vout01234567 = _mm_max_epi16(vout01234567, voutput_min);
       vout89ABCDEF = _mm_max_epi16(vout89ABCDEF, voutput_min);
       voutGHIJKLMN = _mm_max_epi16(voutGHIJKLMN, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
       vout01234567 = _mm_min_epi16(vout01234567, voutput_max);
       vout89ABCDEF = _mm_min_epi16(vout89ABCDEF, voutput_max);
       voutGHIJKLMN = _mm_min_epi16(voutGHIJKLMN, voutput_max);
@@ -625,8 +625,8 @@
         w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
         k += 8;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
         const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
         const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -665,24 +665,24 @@
         const __m128i vq31prod0123 = _mm_shuffle_epi32(vq31prod0213, _MM_SHUFFLE(3, 1, 2, 0));
         const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0));
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
-        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_min));
-        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_max));
+        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min));
+        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max));
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
diff --git a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse41-mul16.c b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse41-mul16.c
index 3d3ff1f..ac66d22 100644
--- a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse41-mul16.c
+++ b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse41-mul16.c
@@ -351,8 +351,8 @@
 
       w = (const void*) ((uintptr_t) w + 24 * sizeof(int32_t) + 216 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -393,7 +393,7 @@
       const __m128i vq31prodGHIJ = _mm_blend_epi16(vq31prodGI, vq31prodHJ, 0xCC);
       const __m128i vq31prodKLMN = _mm_blend_epi16(vq31prodKM, vq31prodLN, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -407,8 +407,8 @@
       const __m128i vremKLMN =
         _mm_add_epi32(_mm_and_si128(vq31prodKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodKLMN));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -422,7 +422,7 @@
       vaccKLMN =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
       __m128i voutGHIJKLMN = _mm_adds_epi16(_mm_packs_epi32(vaccGHIJ, vaccKLMN), voutput_zero_point);
@@ -431,11 +431,11 @@
       __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
       __m128i voutGHIJKLMNGHIJKLMN = _mm_packs_epi16(voutGHIJKLMN, voutGHIJKLMN);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       voutGHIJKLMNGHIJKLMN = _mm_max_epi8(voutGHIJKLMNGHIJKLMN, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
       voutGHIJKLMNGHIJKLMN = _mm_min_epi8(voutGHIJKLMNGHIJKLMN, voutput_max);
 
@@ -570,8 +570,8 @@
         w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
         k += 8;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
         const __m128i vacc57 = _mm_shuffle_epi32(vacc4567, _MM_SHUFFLE(3, 3, 1, 1));
@@ -590,27 +590,27 @@
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
         const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
-        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 8) {
           _mm_storel_epi64((__m128i*) output, vout0123456701234567);
diff --git a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse41-mul32.c b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse41-mul32.c
index 446dfb8..d56b5ba 100644
--- a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse41-mul32.c
+++ b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-sse41-mul32.c
@@ -280,8 +280,8 @@
 
       w = (const void*) ((uintptr_t) w + 24 * sizeof(int32_t) + 216 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -322,7 +322,7 @@
       const __m128i vq31prodGHIJ = _mm_blend_epi16(vq31prodGI, vq31prodHJ, 0xCC);
       const __m128i vq31prodKLMN = _mm_blend_epi16(vq31prodKM, vq31prodLN, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -336,8 +336,8 @@
       const __m128i vremKLMN =
         _mm_add_epi32(_mm_and_si128(vq31prodKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodKLMN));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -351,13 +351,13 @@
       vaccKLMN =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
       __m128i voutGHIJKLMN = _mm_adds_epi16(_mm_packs_epi32(vaccGHIJ, vaccKLMN), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
@@ -432,8 +432,8 @@
         w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t));
         k += 4;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -445,21 +445,21 @@
 
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc0123), voutput_zero_point);
 
         vout0123 = _mm_packs_epi16(vout0123, vout0123);
-        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 4) {
           _mm_storeu_si32(output, vout0123);
diff --git a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-ssse3-mul16.c b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-ssse3-mul16.c
index 4b723de..9a9d8ee 100644
--- a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-ssse3-mul16.c
+++ b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-ssse3-mul16.c
@@ -351,8 +351,8 @@
 
       w = (const void*) ((uintptr_t) w + 24 * sizeof(int32_t) + 216 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
       const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
       const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -446,7 +446,7 @@
       const __m128i vq31prodGHIJ = _mm_shuffle_epi32(vq31prodGIHJ, _MM_SHUFFLE(3, 1, 2, 0));
       const __m128i vq31prodKLMN = _mm_shuffle_epi32(vq31prodKMLN, _MM_SHUFFLE(3, 1, 2, 0));
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -460,8 +460,8 @@
       const __m128i vremKLMN =
         _mm_add_epi32(_mm_and_si128(vq31prodKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodKLMN));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -475,17 +475,17 @@
       vaccKLMN =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
       __m128i voutGHIJKLMN = _mm_adds_epi16(_mm_packs_epi32(vaccGHIJ, vaccKLMN), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
       vout01234567 = _mm_max_epi16(vout01234567, voutput_min);
       vout89ABCDEF = _mm_max_epi16(vout89ABCDEF, voutput_min);
       voutGHIJKLMN = _mm_max_epi16(voutGHIJKLMN, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
       vout01234567 = _mm_min_epi16(vout01234567, voutput_max);
       vout89ABCDEF = _mm_min_epi16(vout89ABCDEF, voutput_max);
       voutGHIJKLMN = _mm_min_epi16(voutGHIJKLMN, voutput_max);
@@ -625,8 +625,8 @@
         w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
         k += 8;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
         const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
         const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -665,24 +665,24 @@
         const __m128i vq31prod0123 = _mm_shuffle_epi32(vq31prod0213, _MM_SHUFFLE(3, 1, 2, 0));
         const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0));
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
-        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_min));
-        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_max));
+        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min));
+        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max));
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
diff --git a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-wasmsimd-mul16.c b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-wasmsimd-mul16.c
index 5b048d9..1f5fc62 100644
--- a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-wasmsimd-mul16.c
+++ b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-wasmsimd-mul16.c
@@ -281,8 +281,8 @@
       const v128_t vaccKL = wasm_v32x4_shuffle(vaccKLMN, vsignKLMN, 0, 4, 1, 5);
       const v128_t vaccMN = wasm_v32x4_shuffle(vaccKLMN, vsignKLMN, 2, 6, 3, 7);
 
-      const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-      const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+      const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+      const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
       const v128_t vprod01 = wasm_i64x2_add(wasm_i64x2_mul(vacc01, vmultiplier), vrounding);
       const v128_t vprod23 = wasm_i64x2_add(wasm_i64x2_mul(vacc23, vmultiplier), vrounding);
       const v128_t vprod45 = wasm_i64x2_add(wasm_i64x2_mul(vacc45, vmultiplier), vrounding);
@@ -303,7 +303,7 @@
       const v128_t vq31prodGHIJ = wasm_v32x4_shuffle(vprodGH, vprodIJ, 1, 3, 5, 7);
       const v128_t vq31prodKLMN = wasm_v32x4_shuffle(vprodKL, vprodMN, 1, 3, 5, 7);
 
-      const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+      const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
       const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0123, vremainder_mask), wasm_i32x4_shr(vq31prod0123, 31));
       const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vq31prod4567, vremainder_mask), wasm_i32x4_shr(vq31prod4567, 31));
       const v128_t vrem89AB = wasm_i32x4_add(wasm_v128_and(vq31prod89AB, vremainder_mask), wasm_i32x4_shr(vq31prod89AB, 31));
@@ -311,8 +311,8 @@
       const v128_t vremGHIJ = wasm_i32x4_add(wasm_v128_and(vq31prodGHIJ, vremainder_mask), wasm_i32x4_shr(vq31prodGHIJ, 31));
       const v128_t vremKLMN = wasm_i32x4_add(wasm_v128_and(vq31prodKLMN, vremainder_mask), wasm_i32x4_shr(vq31prodKLMN, 31));
 
-      const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-      const int32_t vshift = params->wasmsimd.shift;
+      const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+      const int32_t vshift = params->gemmlowp_wasmsimd.shift;
       vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0123, vshift), wasm_i32x4_gt(vrem0123, vthreshold));
       vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod4567, vshift), wasm_i32x4_gt(vrem4567, vthreshold));
       vacc89AB = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod89AB, vshift), wasm_i32x4_gt(vrem89AB, vthreshold));
@@ -320,13 +320,13 @@
       vaccGHIJ = wasm_i32x4_sub(wasm_i32x4_shr(vq31prodGHIJ, vshift), wasm_i32x4_gt(vremGHIJ, vthreshold));
       vaccKLMN = wasm_i32x4_sub(wasm_i32x4_shr(vq31prodKLMN, vshift), wasm_i32x4_gt(vremKLMN, vthreshold));
 
-      const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+      const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
       v128_t vout01234567 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0123, vacc4567), voutput_zero_point);
       v128_t vout89ABCDEF = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc89AB, vaccCDEF), voutput_zero_point);
       v128_t voutGHIJKLMN = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vaccGHIJ, vaccKLMN), voutput_zero_point);
 
-      const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
-      const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+      const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
+      const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
       v128_t vout0123456789ABCDEF = wasm_i8x16_min(wasm_i8x16_max(wasm_i8x16_narrow_i16x8(vout01234567, vout89ABCDEF), voutput_min), voutput_max);
       v128_t voutGHIJKLMNGHIJKLMN = wasm_i8x16_min(wasm_i8x16_max(wasm_i8x16_narrow_i16x8(voutGHIJKLMN, voutGHIJKLMN), voutput_min), voutput_max);
 
@@ -433,8 +433,8 @@
       const v128_t vacc45 = wasm_v32x4_shuffle(vacc4567, vsign4567, 0, 4, 1, 5);
       const v128_t vacc67 = wasm_v32x4_shuffle(vacc4567, vsign4567, 2, 6, 3, 7);
 
-      const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-      const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+      const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+      const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
       const v128_t vprod01 = wasm_i64x2_add(wasm_i64x2_mul(vacc01, vmultiplier), vrounding);
       const v128_t vprod23 = wasm_i64x2_add(wasm_i64x2_mul(vacc23, vmultiplier), vrounding);
       const v128_t vprod45 = wasm_i64x2_add(wasm_i64x2_mul(vacc45, vmultiplier), vrounding);
@@ -443,20 +443,20 @@
       const v128_t vq31prod0123 = wasm_v32x4_shuffle(vprod01, vprod23, 1, 3, 5, 7);
       const v128_t vq31prod4567 = wasm_v32x4_shuffle(vprod45, vprod67, 1, 3, 5, 7);
 
-      const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+      const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
       const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0123, vremainder_mask), wasm_i32x4_shr(vq31prod0123, 31));
       const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vq31prod4567, vremainder_mask), wasm_i32x4_shr(vq31prod4567, 31));
 
-      const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-      const int32_t vshift = params->wasmsimd.shift;
+      const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+      const int32_t vshift = params->gemmlowp_wasmsimd.shift;
       vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0123, vshift), wasm_i32x4_gt(vrem0123, vthreshold));
       vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod4567, vshift), wasm_i32x4_gt(vrem4567, vthreshold));
 
-      const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+      const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
       v128_t vout01234567 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0123, vacc4567), voutput_zero_point);
 
-      const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
-      const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+      const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
+      const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
       v128_t vout0123456701234567 = wasm_i8x16_min(wasm_i8x16_max(wasm_i8x16_narrow_i16x8(vout01234567, vout01234567), voutput_min), voutput_max);
 
       if XNN_LIKELY(c >= 8) {
diff --git a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-xop-mul32.c b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-xop-mul32.c
index a3e631f..752626a 100644
--- a/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-xop-mul32.c
+++ b/src/qs8-dwconv/gen/up24x9-minmax-gemmlowp-xop-mul32.c
@@ -285,8 +285,8 @@
 
       w = (const void*) ((uintptr_t) w + 24 * sizeof(int32_t) + 216 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -327,7 +327,7 @@
       const __m128i vq31prodGHIJ = _mm_blend_epi16(vq31prodGI, vq31prodHJ, 0xCC);
       const __m128i vq31prodKLMN = _mm_blend_epi16(vq31prodKM, vq31prodLN, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
@@ -341,8 +341,8 @@
       const __m128i vremKLMN =
         _mm_add_epi32(_mm_and_si128(vq31prodKLMN, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prodKLMN));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
@@ -356,13 +356,13 @@
       vaccKLMN =
         _mm_sub_epi32(_mm_sra_epi32(vq31prodKLMN, vshift), _mm_cmpgt_epi32(vremKLMN, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
       __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point);
       __m128i voutGHIJKLMN = _mm_adds_epi16(_mm_packs_epi32(vaccGHIJ, vaccKLMN), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
@@ -437,8 +437,8 @@
         w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t));
         k += 4;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -450,21 +450,21 @@
 
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc0123), voutput_zero_point);
 
         vout0123 = _mm_packs_epi16(vout0123, vout0123);
-        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 4) {
           _mm_storeu_si32(output, vout0123);
diff --git a/src/qs8-dwconv/gen/up2x25-minmax-gemmlowp-scalar.c b/src/qs8-dwconv/gen/up2x25-minmax-gemmlowp-scalar.c
index fb742e7..37fc8d7 100644
--- a/src/qs8-dwconv/gen/up2x25-minmax-gemmlowp-scalar.c
+++ b/src/qs8-dwconv/gen/up2x25-minmax-gemmlowp-scalar.c
@@ -28,14 +28,14 @@
   assert(channels != 0);
   assert(output_width != 0);
 
-  const int32_t vmultiplier = params->scalar.multiplier;
+  const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
   const int64_t vq31rounding = INT64_C(0x40000000);
-  const int32_t vremainder_mask = params->scalar.remainder_mask;
-  const uint32_t vshift = params->scalar.shift;
-  const int32_t vremainder_threshold = params->scalar.remainder_threshold;
-  const int32_t vout_min = params->scalar.output_min_less_zero_point;
-  const int32_t vout_max = params->scalar.output_max_less_zero_point;
-  const int32_t voutput_zero_point = params->scalar.output_zero_point;
+  const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
+  const uint32_t vshift = params->gemmlowp_scalar.shift;
+  const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
+  const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
+  const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
+  const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
   do {
     const int8_t* i0 = input[0];
     assert(i0 != NULL);
diff --git a/src/qs8-dwconv/gen/up2x9-minmax-gemmlowp-scalar.c b/src/qs8-dwconv/gen/up2x9-minmax-gemmlowp-scalar.c
index ae2a2a3..e7ed15b 100644
--- a/src/qs8-dwconv/gen/up2x9-minmax-gemmlowp-scalar.c
+++ b/src/qs8-dwconv/gen/up2x9-minmax-gemmlowp-scalar.c
@@ -28,14 +28,14 @@
   assert(channels != 0);
   assert(output_width != 0);
 
-  const int32_t vmultiplier = params->scalar.multiplier;
+  const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
   const int64_t vq31rounding = INT64_C(0x40000000);
-  const int32_t vremainder_mask = params->scalar.remainder_mask;
-  const uint32_t vshift = params->scalar.shift;
-  const int32_t vremainder_threshold = params->scalar.remainder_threshold;
-  const int32_t vout_min = params->scalar.output_min_less_zero_point;
-  const int32_t vout_max = params->scalar.output_max_less_zero_point;
-  const int32_t voutput_zero_point = params->scalar.output_zero_point;
+  const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
+  const uint32_t vshift = params->gemmlowp_scalar.shift;
+  const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
+  const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
+  const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
+  const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
   do {
     const int8_t* i0 = input[0];
     assert(i0 != NULL);
diff --git a/src/qs8-dwconv/gen/up32x25-minmax-fp32-avx2-mul16.c b/src/qs8-dwconv/gen/up32x25-minmax-fp32-avx2-mul16.c
new file mode 100644
index 0000000..5fbbc91
--- /dev/null
+++ b/src/qs8-dwconv/gen/up32x25-minmax-fp32-avx2-mul16.c
@@ -0,0 +1,865 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-dwconv/unipass-avx2-mul16.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <immintrin.h>
+
+#include <xnnpack/dwconv.h>
+
+
+void xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16(
+    size_t channels,
+    size_t output_width,
+    const int8_t** input,
+    const void* weights,
+    int8_t* output,
+    size_t input_stride,
+    size_t output_increment,
+    size_t input_offset,
+    const int8_t* zero,
+    const union xnn_qs8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(channels != 0);
+  assert(output_width != 0);
+
+  do {
+    const int8_t* i0 = input[0];
+    assert(i0 != NULL);
+    if XNN_UNPREDICTABLE(i0 != zero) {
+      i0 = (const int8_t*) ((uintptr_t) i0 + input_offset);
+    }
+    const int8_t* i1 = input[1];
+    assert(i1 != NULL);
+    if XNN_UNPREDICTABLE(i1 != zero) {
+      i1 = (const int8_t*) ((uintptr_t) i1 + input_offset);
+    }
+    const int8_t* i2 = input[2];
+    assert(i2 != NULL);
+    if XNN_UNPREDICTABLE(i2 != zero) {
+      i2 = (const int8_t*) ((uintptr_t) i2 + input_offset);
+    }
+    const int8_t* i3 = input[3];
+    assert(i3 != NULL);
+    if XNN_UNPREDICTABLE(i3 != zero) {
+      i3 = (const int8_t*) ((uintptr_t) i3 + input_offset);
+    }
+    const int8_t* i4 = input[4];
+    assert(i4 != NULL);
+    if XNN_UNPREDICTABLE(i4 != zero) {
+      i4 = (const int8_t*) ((uintptr_t) i4 + input_offset);
+    }
+    const int8_t* i5 = input[5];
+    assert(i5 != NULL);
+    if XNN_UNPREDICTABLE(i5 != zero) {
+      i5 = (const int8_t*) ((uintptr_t) i5 + input_offset);
+    }
+    const int8_t* i6 = input[6];
+    assert(i6 != NULL);
+    if XNN_UNPREDICTABLE(i6 != zero) {
+      i6 = (const int8_t*) ((uintptr_t) i6 + input_offset);
+    }
+    const int8_t* i7 = input[7];
+    assert(i7 != NULL);
+    if XNN_UNPREDICTABLE(i7 != zero) {
+      i7 = (const int8_t*) ((uintptr_t) i7 + input_offset);
+    }
+    const int8_t* i8 = input[8];
+    assert(i8 != NULL);
+    if XNN_UNPREDICTABLE(i8 != zero) {
+      i8 = (const int8_t*) ((uintptr_t) i8 + input_offset);
+    }
+    const int8_t* i9 = input[9];
+    assert(i9 != NULL);
+    if XNN_UNPREDICTABLE(i9 != zero) {
+      i9 = (const int8_t*) ((uintptr_t) i9 + input_offset);
+    }
+    const int8_t* i10 = input[10];
+    assert(i10 != NULL);
+    if XNN_UNPREDICTABLE(i10 != zero) {
+      i10 = (const int8_t*) ((uintptr_t) i10 + input_offset);
+    }
+    const int8_t* i11 = input[11];
+    assert(i11 != NULL);
+    if XNN_UNPREDICTABLE(i11 != zero) {
+      i11 = (const int8_t*) ((uintptr_t) i11 + input_offset);
+    }
+    const int8_t* i12 = input[12];
+    assert(i12 != NULL);
+    if XNN_UNPREDICTABLE(i12 != zero) {
+      i12 = (const int8_t*) ((uintptr_t) i12 + input_offset);
+    }
+    const int8_t* i13 = input[13];
+    assert(i13 != NULL);
+    if XNN_UNPREDICTABLE(i13 != zero) {
+      i13 = (const int8_t*) ((uintptr_t) i13 + input_offset);
+    }
+    const int8_t* i14 = input[14];
+    assert(i14 != NULL);
+    if XNN_UNPREDICTABLE(i14 != zero) {
+      i14 = (const int8_t*) ((uintptr_t) i14 + input_offset);
+    }
+    const int8_t* i15 = input[15];
+    assert(i15 != NULL);
+    if XNN_UNPREDICTABLE(i15 != zero) {
+      i15 = (const int8_t*) ((uintptr_t) i15 + input_offset);
+    }
+    const int8_t* i16 = input[16];
+    assert(i16 != NULL);
+    if XNN_UNPREDICTABLE(i16 != zero) {
+      i16 = (const int8_t*) ((uintptr_t) i16 + input_offset);
+    }
+    const int8_t* i17 = input[17];
+    assert(i17 != NULL);
+    if XNN_UNPREDICTABLE(i17 != zero) {
+      i17 = (const int8_t*) ((uintptr_t) i17 + input_offset);
+    }
+    const int8_t* i18 = input[18];
+    assert(i18 != NULL);
+    if XNN_UNPREDICTABLE(i18 != zero) {
+      i18 = (const int8_t*) ((uintptr_t) i18 + input_offset);
+    }
+    const int8_t* i19 = input[19];
+    assert(i19 != NULL);
+    if XNN_UNPREDICTABLE(i19 != zero) {
+      i19 = (const int8_t*) ((uintptr_t) i19 + input_offset);
+    }
+    const int8_t* i20 = input[20];
+    assert(i20 != NULL);
+    if XNN_UNPREDICTABLE(i20 != zero) {
+      i20 = (const int8_t*) ((uintptr_t) i20 + input_offset);
+    }
+    const int8_t* i21 = input[21];
+    assert(i21 != NULL);
+    if XNN_UNPREDICTABLE(i21 != zero) {
+      i21 = (const int8_t*) ((uintptr_t) i21 + input_offset);
+    }
+    const int8_t* i22 = input[22];
+    assert(i22 != NULL);
+    if XNN_UNPREDICTABLE(i22 != zero) {
+      i22 = (const int8_t*) ((uintptr_t) i22 + input_offset);
+    }
+    const int8_t* i23 = input[23];
+    assert(i23 != NULL);
+    if XNN_UNPREDICTABLE(i23 != zero) {
+      i23 = (const int8_t*) ((uintptr_t) i23 + input_offset);
+    }
+    const int8_t* i24 = input[24];
+    assert(i24 != NULL);
+    if XNN_UNPREDICTABLE(i24 != zero) {
+      i24 = (const int8_t*) ((uintptr_t) i24 + input_offset);
+    }
+    input = (const int8_t**) ((uintptr_t) input + input_stride);
+
+    size_t c = channels;
+    const void* w = weights;
+    for (; c >= 32; c -= 32) {
+      __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+      __m256i vacc89ABCDEF = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 8 * sizeof(int32_t)));
+      __m256i vaccGHIJKLMN = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 16 * sizeof(int32_t)));
+      __m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 24 * sizeof(int32_t)));
+
+
+      const __m256i vi0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i0));
+      const __m256i vk0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 0 * sizeof(int8_t))));
+      const __m256i vi0xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i0 + 16)));
+      const __m256i vk0xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 16 * sizeof(int8_t))));
+      i0 += 32;
+
+      const __m256i vprod0x0123456789ABCDEF =  _mm256_mullo_epi16(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF);
+      const __m128i vprod0x89ABCDEF = _mm256_extracti128_si256(vprod0x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod0x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod0x89ABCDEF));
+      const __m256i vprod0xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi0xGHIJKLMNOPQRSTUV, vk0xGHIJKLMNOPQRSTUV);
+      const __m128i vprod0xOPQRSTUV = _mm256_extracti128_si256(vprod0xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod0xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod0xOPQRSTUV));
+
+      const __m256i vi1x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i1));
+      const __m256i vk1x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 32 * sizeof(int8_t))));
+      const __m256i vi1xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i1 + 16)));
+      const __m256i vk1xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 48 * sizeof(int8_t))));
+      i1 += 32;
+
+      const __m256i vprod1x0123456789ABCDEF =  _mm256_mullo_epi16(vi1x0123456789ABCDEF, vk1x0123456789ABCDEF);
+      const __m128i vprod1x89ABCDEF = _mm256_extracti128_si256(vprod1x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod1x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod1x89ABCDEF));
+      const __m256i vprod1xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi1xGHIJKLMNOPQRSTUV, vk1xGHIJKLMNOPQRSTUV);
+      const __m128i vprod1xOPQRSTUV = _mm256_extracti128_si256(vprod1xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod1xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod1xOPQRSTUV));
+
+      const __m256i vi2x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i2));
+      const __m256i vk2x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 64 * sizeof(int8_t))));
+      const __m256i vi2xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i2 + 16)));
+      const __m256i vk2xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 80 * sizeof(int8_t))));
+      i2 += 32;
+
+      const __m256i vprod2x0123456789ABCDEF =  _mm256_mullo_epi16(vi2x0123456789ABCDEF, vk2x0123456789ABCDEF);
+      const __m128i vprod2x89ABCDEF = _mm256_extracti128_si256(vprod2x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod2x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod2x89ABCDEF));
+      const __m256i vprod2xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi2xGHIJKLMNOPQRSTUV, vk2xGHIJKLMNOPQRSTUV);
+      const __m128i vprod2xOPQRSTUV = _mm256_extracti128_si256(vprod2xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod2xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod2xOPQRSTUV));
+
+      const __m256i vi3x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i3));
+      const __m256i vk3x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 96 * sizeof(int8_t))));
+      const __m256i vi3xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i3 + 16)));
+      const __m256i vk3xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 112 * sizeof(int8_t))));
+      i3 += 32;
+
+      const __m256i vprod3x0123456789ABCDEF =  _mm256_mullo_epi16(vi3x0123456789ABCDEF, vk3x0123456789ABCDEF);
+      const __m128i vprod3x89ABCDEF = _mm256_extracti128_si256(vprod3x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod3x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod3x89ABCDEF));
+      const __m256i vprod3xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi3xGHIJKLMNOPQRSTUV, vk3xGHIJKLMNOPQRSTUV);
+      const __m128i vprod3xOPQRSTUV = _mm256_extracti128_si256(vprod3xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod3xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod3xOPQRSTUV));
+
+      const __m256i vi4x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i4));
+      const __m256i vk4x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 128 * sizeof(int8_t))));
+      const __m256i vi4xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i4 + 16)));
+      const __m256i vk4xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 144 * sizeof(int8_t))));
+      i4 += 32;
+
+      const __m256i vprod4x0123456789ABCDEF =  _mm256_mullo_epi16(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF);
+      const __m128i vprod4x89ABCDEF = _mm256_extracti128_si256(vprod4x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod4x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod4x89ABCDEF));
+      const __m256i vprod4xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi4xGHIJKLMNOPQRSTUV, vk4xGHIJKLMNOPQRSTUV);
+      const __m128i vprod4xOPQRSTUV = _mm256_extracti128_si256(vprod4xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod4xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod4xOPQRSTUV));
+
+      const __m256i vi5x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i5));
+      const __m256i vk5x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 160 * sizeof(int8_t))));
+      const __m256i vi5xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i5 + 16)));
+      const __m256i vk5xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 176 * sizeof(int8_t))));
+      i5 += 32;
+
+      const __m256i vprod5x0123456789ABCDEF =  _mm256_mullo_epi16(vi5x0123456789ABCDEF, vk5x0123456789ABCDEF);
+      const __m128i vprod5x89ABCDEF = _mm256_extracti128_si256(vprod5x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod5x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod5x89ABCDEF));
+      const __m256i vprod5xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi5xGHIJKLMNOPQRSTUV, vk5xGHIJKLMNOPQRSTUV);
+      const __m128i vprod5xOPQRSTUV = _mm256_extracti128_si256(vprod5xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod5xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod5xOPQRSTUV));
+
+      const __m256i vi6x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i6));
+      const __m256i vk6x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 192 * sizeof(int8_t))));
+      const __m256i vi6xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i6 + 16)));
+      const __m256i vk6xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 208 * sizeof(int8_t))));
+      i6 += 32;
+
+      const __m256i vprod6x0123456789ABCDEF =  _mm256_mullo_epi16(vi6x0123456789ABCDEF, vk6x0123456789ABCDEF);
+      const __m128i vprod6x89ABCDEF = _mm256_extracti128_si256(vprod6x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod6x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod6x89ABCDEF));
+      const __m256i vprod6xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi6xGHIJKLMNOPQRSTUV, vk6xGHIJKLMNOPQRSTUV);
+      const __m128i vprod6xOPQRSTUV = _mm256_extracti128_si256(vprod6xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod6xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod6xOPQRSTUV));
+
+      const __m256i vi7x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i7));
+      const __m256i vk7x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 224 * sizeof(int8_t))));
+      const __m256i vi7xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i7 + 16)));
+      const __m256i vk7xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 240 * sizeof(int8_t))));
+      i7 += 32;
+
+      const __m256i vprod7x0123456789ABCDEF =  _mm256_mullo_epi16(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF);
+      const __m128i vprod7x89ABCDEF = _mm256_extracti128_si256(vprod7x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod7x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod7x89ABCDEF));
+      const __m256i vprod7xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi7xGHIJKLMNOPQRSTUV, vk7xGHIJKLMNOPQRSTUV);
+      const __m128i vprod7xOPQRSTUV = _mm256_extracti128_si256(vprod7xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod7xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod7xOPQRSTUV));
+
+      const __m256i vi8x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i8));
+      const __m256i vk8x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 256 * sizeof(int8_t))));
+      const __m256i vi8xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i8 + 16)));
+      const __m256i vk8xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 272 * sizeof(int8_t))));
+      i8 += 32;
+
+      const __m256i vprod8x0123456789ABCDEF =  _mm256_mullo_epi16(vi8x0123456789ABCDEF, vk8x0123456789ABCDEF);
+      const __m128i vprod8x89ABCDEF = _mm256_extracti128_si256(vprod8x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod8x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod8x89ABCDEF));
+      const __m256i vprod8xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi8xGHIJKLMNOPQRSTUV, vk8xGHIJKLMNOPQRSTUV);
+      const __m128i vprod8xOPQRSTUV = _mm256_extracti128_si256(vprod8xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod8xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod8xOPQRSTUV));
+
+      const __m256i vi9x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i9));
+      const __m256i vk9x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 288 * sizeof(int8_t))));
+      const __m256i vi9xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i9 + 16)));
+      const __m256i vk9xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 304 * sizeof(int8_t))));
+      i9 += 32;
+
+      const __m256i vprod9x0123456789ABCDEF =  _mm256_mullo_epi16(vi9x0123456789ABCDEF, vk9x0123456789ABCDEF);
+      const __m128i vprod9x89ABCDEF = _mm256_extracti128_si256(vprod9x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod9x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod9x89ABCDEF));
+      const __m256i vprod9xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi9xGHIJKLMNOPQRSTUV, vk9xGHIJKLMNOPQRSTUV);
+      const __m128i vprod9xOPQRSTUV = _mm256_extracti128_si256(vprod9xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod9xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod9xOPQRSTUV));
+
+      const __m256i vi10x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i10));
+      const __m256i vk10x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 320 * sizeof(int8_t))));
+      const __m256i vi10xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i10 + 16)));
+      const __m256i vk10xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 336 * sizeof(int8_t))));
+      i10 += 32;
+
+      const __m256i vprod10x0123456789ABCDEF =  _mm256_mullo_epi16(vi10x0123456789ABCDEF, vk10x0123456789ABCDEF);
+      const __m128i vprod10x89ABCDEF = _mm256_extracti128_si256(vprod10x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod10x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod10x89ABCDEF));
+      const __m256i vprod10xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi10xGHIJKLMNOPQRSTUV, vk10xGHIJKLMNOPQRSTUV);
+      const __m128i vprod10xOPQRSTUV = _mm256_extracti128_si256(vprod10xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod10xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod10xOPQRSTUV));
+
+      const __m256i vi11x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i11));
+      const __m256i vk11x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 352 * sizeof(int8_t))));
+      const __m256i vi11xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i11 + 16)));
+      const __m256i vk11xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 368 * sizeof(int8_t))));
+      i11 += 32;
+
+      const __m256i vprod11x0123456789ABCDEF =  _mm256_mullo_epi16(vi11x0123456789ABCDEF, vk11x0123456789ABCDEF);
+      const __m128i vprod11x89ABCDEF = _mm256_extracti128_si256(vprod11x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod11x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod11x89ABCDEF));
+      const __m256i vprod11xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi11xGHIJKLMNOPQRSTUV, vk11xGHIJKLMNOPQRSTUV);
+      const __m128i vprod11xOPQRSTUV = _mm256_extracti128_si256(vprod11xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod11xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod11xOPQRSTUV));
+
+      const __m256i vi12x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i12));
+      const __m256i vk12x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 384 * sizeof(int8_t))));
+      const __m256i vi12xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i12 + 16)));
+      const __m256i vk12xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 400 * sizeof(int8_t))));
+      i12 += 32;
+
+      const __m256i vprod12x0123456789ABCDEF =  _mm256_mullo_epi16(vi12x0123456789ABCDEF, vk12x0123456789ABCDEF);
+      const __m128i vprod12x89ABCDEF = _mm256_extracti128_si256(vprod12x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod12x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod12x89ABCDEF));
+      const __m256i vprod12xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi12xGHIJKLMNOPQRSTUV, vk12xGHIJKLMNOPQRSTUV);
+      const __m128i vprod12xOPQRSTUV = _mm256_extracti128_si256(vprod12xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod12xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod12xOPQRSTUV));
+
+      const __m256i vi13x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i13));
+      const __m256i vk13x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 416 * sizeof(int8_t))));
+      const __m256i vi13xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i13 + 16)));
+      const __m256i vk13xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 432 * sizeof(int8_t))));
+      i13 += 32;
+
+      const __m256i vprod13x0123456789ABCDEF =  _mm256_mullo_epi16(vi13x0123456789ABCDEF, vk13x0123456789ABCDEF);
+      const __m128i vprod13x89ABCDEF = _mm256_extracti128_si256(vprod13x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod13x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod13x89ABCDEF));
+      const __m256i vprod13xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi13xGHIJKLMNOPQRSTUV, vk13xGHIJKLMNOPQRSTUV);
+      const __m128i vprod13xOPQRSTUV = _mm256_extracti128_si256(vprod13xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod13xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod13xOPQRSTUV));
+
+      const __m256i vi14x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i14));
+      const __m256i vk14x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 448 * sizeof(int8_t))));
+      const __m256i vi14xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i14 + 16)));
+      const __m256i vk14xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 464 * sizeof(int8_t))));
+      i14 += 32;
+
+      const __m256i vprod14x0123456789ABCDEF =  _mm256_mullo_epi16(vi14x0123456789ABCDEF, vk14x0123456789ABCDEF);
+      const __m128i vprod14x89ABCDEF = _mm256_extracti128_si256(vprod14x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod14x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod14x89ABCDEF));
+      const __m256i vprod14xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi14xGHIJKLMNOPQRSTUV, vk14xGHIJKLMNOPQRSTUV);
+      const __m128i vprod14xOPQRSTUV = _mm256_extracti128_si256(vprod14xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod14xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod14xOPQRSTUV));
+
+      const __m256i vi15x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i15));
+      const __m256i vk15x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 480 * sizeof(int8_t))));
+      const __m256i vi15xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i15 + 16)));
+      const __m256i vk15xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 496 * sizeof(int8_t))));
+      i15 += 32;
+
+      const __m256i vprod15x0123456789ABCDEF =  _mm256_mullo_epi16(vi15x0123456789ABCDEF, vk15x0123456789ABCDEF);
+      const __m128i vprod15x89ABCDEF = _mm256_extracti128_si256(vprod15x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod15x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod15x89ABCDEF));
+      const __m256i vprod15xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi15xGHIJKLMNOPQRSTUV, vk15xGHIJKLMNOPQRSTUV);
+      const __m128i vprod15xOPQRSTUV = _mm256_extracti128_si256(vprod15xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod15xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod15xOPQRSTUV));
+
+      const __m256i vi16x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i16));
+      const __m256i vk16x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 512 * sizeof(int8_t))));
+      const __m256i vi16xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i16 + 16)));
+      const __m256i vk16xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 528 * sizeof(int8_t))));
+      i16 += 32;
+
+      const __m256i vprod16x0123456789ABCDEF =  _mm256_mullo_epi16(vi16x0123456789ABCDEF, vk16x0123456789ABCDEF);
+      const __m128i vprod16x89ABCDEF = _mm256_extracti128_si256(vprod16x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod16x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod16x89ABCDEF));
+      const __m256i vprod16xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi16xGHIJKLMNOPQRSTUV, vk16xGHIJKLMNOPQRSTUV);
+      const __m128i vprod16xOPQRSTUV = _mm256_extracti128_si256(vprod16xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod16xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod16xOPQRSTUV));
+
+      const __m256i vi17x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i17));
+      const __m256i vk17x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 544 * sizeof(int8_t))));
+      const __m256i vi17xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i17 + 16)));
+      const __m256i vk17xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 560 * sizeof(int8_t))));
+      i17 += 32;
+
+      const __m256i vprod17x0123456789ABCDEF =  _mm256_mullo_epi16(vi17x0123456789ABCDEF, vk17x0123456789ABCDEF);
+      const __m128i vprod17x89ABCDEF = _mm256_extracti128_si256(vprod17x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod17x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod17x89ABCDEF));
+      const __m256i vprod17xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi17xGHIJKLMNOPQRSTUV, vk17xGHIJKLMNOPQRSTUV);
+      const __m128i vprod17xOPQRSTUV = _mm256_extracti128_si256(vprod17xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod17xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod17xOPQRSTUV));
+
+      const __m256i vi18x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i18));
+      const __m256i vk18x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 576 * sizeof(int8_t))));
+      const __m256i vi18xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i18 + 16)));
+      const __m256i vk18xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 592 * sizeof(int8_t))));
+      i18 += 32;
+
+      const __m256i vprod18x0123456789ABCDEF =  _mm256_mullo_epi16(vi18x0123456789ABCDEF, vk18x0123456789ABCDEF);
+      const __m128i vprod18x89ABCDEF = _mm256_extracti128_si256(vprod18x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod18x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod18x89ABCDEF));
+      const __m256i vprod18xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi18xGHIJKLMNOPQRSTUV, vk18xGHIJKLMNOPQRSTUV);
+      const __m128i vprod18xOPQRSTUV = _mm256_extracti128_si256(vprod18xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod18xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod18xOPQRSTUV));
+
+      const __m256i vi19x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i19));
+      const __m256i vk19x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 608 * sizeof(int8_t))));
+      const __m256i vi19xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i19 + 16)));
+      const __m256i vk19xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 624 * sizeof(int8_t))));
+      i19 += 32;
+
+      const __m256i vprod19x0123456789ABCDEF =  _mm256_mullo_epi16(vi19x0123456789ABCDEF, vk19x0123456789ABCDEF);
+      const __m128i vprod19x89ABCDEF = _mm256_extracti128_si256(vprod19x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod19x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod19x89ABCDEF));
+      const __m256i vprod19xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi19xGHIJKLMNOPQRSTUV, vk19xGHIJKLMNOPQRSTUV);
+      const __m128i vprod19xOPQRSTUV = _mm256_extracti128_si256(vprod19xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod19xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod19xOPQRSTUV));
+
+      const __m256i vi20x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i20));
+      const __m256i vk20x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 640 * sizeof(int8_t))));
+      const __m256i vi20xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i20 + 16)));
+      const __m256i vk20xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 656 * sizeof(int8_t))));
+      i20 += 32;
+
+      const __m256i vprod20x0123456789ABCDEF =  _mm256_mullo_epi16(vi20x0123456789ABCDEF, vk20x0123456789ABCDEF);
+      const __m128i vprod20x89ABCDEF = _mm256_extracti128_si256(vprod20x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod20x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod20x89ABCDEF));
+      const __m256i vprod20xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi20xGHIJKLMNOPQRSTUV, vk20xGHIJKLMNOPQRSTUV);
+      const __m128i vprod20xOPQRSTUV = _mm256_extracti128_si256(vprod20xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod20xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod20xOPQRSTUV));
+
+      const __m256i vi21x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i21));
+      const __m256i vk21x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 672 * sizeof(int8_t))));
+      const __m256i vi21xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i21 + 16)));
+      const __m256i vk21xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 688 * sizeof(int8_t))));
+      i21 += 32;
+
+      const __m256i vprod21x0123456789ABCDEF =  _mm256_mullo_epi16(vi21x0123456789ABCDEF, vk21x0123456789ABCDEF);
+      const __m128i vprod21x89ABCDEF = _mm256_extracti128_si256(vprod21x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod21x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod21x89ABCDEF));
+      const __m256i vprod21xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi21xGHIJKLMNOPQRSTUV, vk21xGHIJKLMNOPQRSTUV);
+      const __m128i vprod21xOPQRSTUV = _mm256_extracti128_si256(vprod21xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod21xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod21xOPQRSTUV));
+
+      const __m256i vi22x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i22));
+      const __m256i vk22x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 704 * sizeof(int8_t))));
+      const __m256i vi22xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i22 + 16)));
+      const __m256i vk22xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 720 * sizeof(int8_t))));
+      i22 += 32;
+
+      const __m256i vprod22x0123456789ABCDEF =  _mm256_mullo_epi16(vi22x0123456789ABCDEF, vk22x0123456789ABCDEF);
+      const __m128i vprod22x89ABCDEF = _mm256_extracti128_si256(vprod22x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod22x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod22x89ABCDEF));
+      const __m256i vprod22xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi22xGHIJKLMNOPQRSTUV, vk22xGHIJKLMNOPQRSTUV);
+      const __m128i vprod22xOPQRSTUV = _mm256_extracti128_si256(vprod22xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod22xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod22xOPQRSTUV));
+
+      const __m256i vi23x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i23));
+      const __m256i vk23x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 736 * sizeof(int8_t))));
+      const __m256i vi23xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i23 + 16)));
+      const __m256i vk23xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 752 * sizeof(int8_t))));
+      i23 += 32;
+
+      const __m256i vprod23x0123456789ABCDEF =  _mm256_mullo_epi16(vi23x0123456789ABCDEF, vk23x0123456789ABCDEF);
+      const __m128i vprod23x89ABCDEF = _mm256_extracti128_si256(vprod23x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod23x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod23x89ABCDEF));
+      const __m256i vprod23xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi23xGHIJKLMNOPQRSTUV, vk23xGHIJKLMNOPQRSTUV);
+      const __m128i vprod23xOPQRSTUV = _mm256_extracti128_si256(vprod23xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod23xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod23xOPQRSTUV));
+
+      const __m256i vi24x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i24));
+      const __m256i vk24x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 768 * sizeof(int8_t))));
+      const __m256i vi24xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i24 + 16)));
+      const __m256i vk24xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 784 * sizeof(int8_t))));
+      i24 += 32;
+
+      const __m256i vprod24x0123456789ABCDEF =  _mm256_mullo_epi16(vi24x0123456789ABCDEF, vk24x0123456789ABCDEF);
+      const __m128i vprod24x89ABCDEF = _mm256_extracti128_si256(vprod24x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod24x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod24x89ABCDEF));
+      const __m256i vprod24xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi24xGHIJKLMNOPQRSTUV, vk24xGHIJKLMNOPQRSTUV);
+      const __m128i vprod24xOPQRSTUV = _mm256_extracti128_si256(vprod24xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod24xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod24xOPQRSTUV));
+
+      w = (const void*) ((uintptr_t) w + 32 * sizeof(int32_t) + 800 * sizeof(int8_t));
+      __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+      __m256 vscaled89ABCDEF = _mm256_cvtepi32_ps(vacc89ABCDEF);
+      __m256 vscaledGHIJKLMN = _mm256_cvtepi32_ps(vaccGHIJKLMN);
+      __m256 vscaledOPQRSTUV = _mm256_cvtepi32_ps(vaccOPQRSTUV);
+
+      const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+      vscaled01234567 = _mm256_mul_ps(vscaled01234567, vscale);
+      vscaled89ABCDEF = _mm256_mul_ps(vscaled89ABCDEF, vscale);
+      vscaledGHIJKLMN = _mm256_mul_ps(vscaledGHIJKLMN, vscale);
+      vscaledOPQRSTUV = _mm256_mul_ps(vscaledOPQRSTUV, vscale);
+
+      vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+      vacc89ABCDEF = _mm256_cvtps_epi32(vscaled89ABCDEF);
+      vaccGHIJKLMN = _mm256_cvtps_epi32(vscaledGHIJKLMN);
+      vaccOPQRSTUV = _mm256_cvtps_epi32(vscaledOPQRSTUV);
+
+      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->fp32_avx2.output_zero_point);
+      const __m256i vout012389AB4567CDEF = _mm256_adds_epi16(_mm256_packs_epi32(vacc01234567, vacc89ABCDEF), voutput_zero_point);
+      const __m256i voutGHIJOPQRKLMNSTUV = _mm256_adds_epi16(_mm256_packs_epi32(vaccGHIJKLMN, vaccOPQRSTUV), voutput_zero_point);
+
+      __m128i vout0123456789ABCDEF = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(vout012389AB4567CDEF), _mm256_extracti128_si256(vout012389AB4567CDEF, 1)), _MM_SHUFFLE(3, 1, 2, 0));
+      __m128i voutGHIJKLMNOPQRSTUV = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(voutGHIJOPQRKLMNSTUV), _mm256_extracti128_si256(voutGHIJOPQRKLMNSTUV, 1)), _MM_SHUFFLE(3, 1, 2, 0));
+
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+      vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
+      vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
+      voutGHIJKLMNOPQRSTUV = _mm_max_epi8(voutGHIJKLMNOPQRSTUV, voutput_min);
+      voutGHIJKLMNOPQRSTUV = _mm_min_epi8(voutGHIJKLMNOPQRSTUV, voutput_max);
+
+      _mm_storeu_si128((__m128i*) output, vout0123456789ABCDEF);
+      _mm_storeu_si128((__m128i*) (output + 16), voutGHIJKLMNOPQRSTUV);
+      output += 32;
+    }
+    if XNN_UNLIKELY(c != 0) {
+      const int8_t* k = (const int8_t*) ((uintptr_t) w + 32 * sizeof(int32_t));
+      do {
+        __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+        __m256i vacc89ABCDEF = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 8 * sizeof(int32_t)));
+
+
+        const __m256i vi0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i0));
+        const __m256i vk0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) k));
+        i0 += 16;
+
+        const __m256i vprod0x0123456789ABCDEF = _mm256_mullo_epi16(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF);
+        const __m128i vprod0x89ABCDEF = _mm256_extracti128_si256(vprod0x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod0x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod0x89ABCDEF));
+
+        const __m256i vi1x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i1));
+        const __m256i vk1x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 32)));
+        i1 += 16;
+
+        const __m256i vprod1x0123456789ABCDEF = _mm256_mullo_epi16(vi1x0123456789ABCDEF, vk1x0123456789ABCDEF);
+        const __m128i vprod1x89ABCDEF = _mm256_extracti128_si256(vprod1x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod1x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod1x89ABCDEF));
+
+        const __m256i vi2x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i2));
+        const __m256i vk2x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 64)));
+        i2 += 16;
+
+        const __m256i vprod2x0123456789ABCDEF = _mm256_mullo_epi16(vi2x0123456789ABCDEF, vk2x0123456789ABCDEF);
+        const __m128i vprod2x89ABCDEF = _mm256_extracti128_si256(vprod2x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod2x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod2x89ABCDEF));
+
+        const __m256i vi3x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i3));
+        const __m256i vk3x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 96)));
+        i3 += 16;
+
+        const __m256i vprod3x0123456789ABCDEF = _mm256_mullo_epi16(vi3x0123456789ABCDEF, vk3x0123456789ABCDEF);
+        const __m128i vprod3x89ABCDEF = _mm256_extracti128_si256(vprod3x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod3x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod3x89ABCDEF));
+
+        const __m256i vi4x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i4));
+        const __m256i vk4x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 128)));
+        i4 += 16;
+
+        const __m256i vprod4x0123456789ABCDEF = _mm256_mullo_epi16(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF);
+        const __m128i vprod4x89ABCDEF = _mm256_extracti128_si256(vprod4x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod4x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod4x89ABCDEF));
+
+        const __m256i vi5x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i5));
+        const __m256i vk5x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 160)));
+        i5 += 16;
+
+        const __m256i vprod5x0123456789ABCDEF = _mm256_mullo_epi16(vi5x0123456789ABCDEF, vk5x0123456789ABCDEF);
+        const __m128i vprod5x89ABCDEF = _mm256_extracti128_si256(vprod5x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod5x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod5x89ABCDEF));
+
+        const __m256i vi6x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i6));
+        const __m256i vk6x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 192)));
+        i6 += 16;
+
+        const __m256i vprod6x0123456789ABCDEF = _mm256_mullo_epi16(vi6x0123456789ABCDEF, vk6x0123456789ABCDEF);
+        const __m128i vprod6x89ABCDEF = _mm256_extracti128_si256(vprod6x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod6x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod6x89ABCDEF));
+
+        const __m256i vi7x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i7));
+        const __m256i vk7x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 224)));
+        i7 += 16;
+
+        const __m256i vprod7x0123456789ABCDEF = _mm256_mullo_epi16(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF);
+        const __m128i vprod7x89ABCDEF = _mm256_extracti128_si256(vprod7x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod7x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod7x89ABCDEF));
+
+        const __m256i vi8x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i8));
+        const __m256i vk8x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 256)));
+        i8 += 16;
+
+        const __m256i vprod8x0123456789ABCDEF = _mm256_mullo_epi16(vi8x0123456789ABCDEF, vk8x0123456789ABCDEF);
+        const __m128i vprod8x89ABCDEF = _mm256_extracti128_si256(vprod8x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod8x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod8x89ABCDEF));
+
+        const __m256i vi9x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i9));
+        const __m256i vk9x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 288)));
+        i9 += 16;
+
+        const __m256i vprod9x0123456789ABCDEF = _mm256_mullo_epi16(vi9x0123456789ABCDEF, vk9x0123456789ABCDEF);
+        const __m128i vprod9x89ABCDEF = _mm256_extracti128_si256(vprod9x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod9x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod9x89ABCDEF));
+
+        const __m256i vi10x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i10));
+        const __m256i vk10x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 320)));
+        i10 += 16;
+
+        const __m256i vprod10x0123456789ABCDEF = _mm256_mullo_epi16(vi10x0123456789ABCDEF, vk10x0123456789ABCDEF);
+        const __m128i vprod10x89ABCDEF = _mm256_extracti128_si256(vprod10x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod10x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod10x89ABCDEF));
+
+        const __m256i vi11x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i11));
+        const __m256i vk11x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 352)));
+        i11 += 16;
+
+        const __m256i vprod11x0123456789ABCDEF = _mm256_mullo_epi16(vi11x0123456789ABCDEF, vk11x0123456789ABCDEF);
+        const __m128i vprod11x89ABCDEF = _mm256_extracti128_si256(vprod11x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod11x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod11x89ABCDEF));
+
+        const __m256i vi12x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i12));
+        const __m256i vk12x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 384)));
+        i12 += 16;
+
+        const __m256i vprod12x0123456789ABCDEF = _mm256_mullo_epi16(vi12x0123456789ABCDEF, vk12x0123456789ABCDEF);
+        const __m128i vprod12x89ABCDEF = _mm256_extracti128_si256(vprod12x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod12x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod12x89ABCDEF));
+
+        const __m256i vi13x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i13));
+        const __m256i vk13x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 416)));
+        i13 += 16;
+
+        const __m256i vprod13x0123456789ABCDEF = _mm256_mullo_epi16(vi13x0123456789ABCDEF, vk13x0123456789ABCDEF);
+        const __m128i vprod13x89ABCDEF = _mm256_extracti128_si256(vprod13x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod13x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod13x89ABCDEF));
+
+        const __m256i vi14x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i14));
+        const __m256i vk14x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 448)));
+        i14 += 16;
+
+        const __m256i vprod14x0123456789ABCDEF = _mm256_mullo_epi16(vi14x0123456789ABCDEF, vk14x0123456789ABCDEF);
+        const __m128i vprod14x89ABCDEF = _mm256_extracti128_si256(vprod14x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod14x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod14x89ABCDEF));
+
+        const __m256i vi15x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i15));
+        const __m256i vk15x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 480)));
+        i15 += 16;
+
+        const __m256i vprod15x0123456789ABCDEF = _mm256_mullo_epi16(vi15x0123456789ABCDEF, vk15x0123456789ABCDEF);
+        const __m128i vprod15x89ABCDEF = _mm256_extracti128_si256(vprod15x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod15x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod15x89ABCDEF));
+
+        const __m256i vi16x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i16));
+        const __m256i vk16x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 512)));
+        i16 += 16;
+
+        const __m256i vprod16x0123456789ABCDEF = _mm256_mullo_epi16(vi16x0123456789ABCDEF, vk16x0123456789ABCDEF);
+        const __m128i vprod16x89ABCDEF = _mm256_extracti128_si256(vprod16x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod16x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod16x89ABCDEF));
+
+        const __m256i vi17x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i17));
+        const __m256i vk17x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 544)));
+        i17 += 16;
+
+        const __m256i vprod17x0123456789ABCDEF = _mm256_mullo_epi16(vi17x0123456789ABCDEF, vk17x0123456789ABCDEF);
+        const __m128i vprod17x89ABCDEF = _mm256_extracti128_si256(vprod17x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod17x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod17x89ABCDEF));
+
+        const __m256i vi18x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i18));
+        const __m256i vk18x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 576)));
+        i18 += 16;
+
+        const __m256i vprod18x0123456789ABCDEF = _mm256_mullo_epi16(vi18x0123456789ABCDEF, vk18x0123456789ABCDEF);
+        const __m128i vprod18x89ABCDEF = _mm256_extracti128_si256(vprod18x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod18x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod18x89ABCDEF));
+
+        const __m256i vi19x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i19));
+        const __m256i vk19x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 608)));
+        i19 += 16;
+
+        const __m256i vprod19x0123456789ABCDEF = _mm256_mullo_epi16(vi19x0123456789ABCDEF, vk19x0123456789ABCDEF);
+        const __m128i vprod19x89ABCDEF = _mm256_extracti128_si256(vprod19x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod19x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod19x89ABCDEF));
+
+        const __m256i vi20x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i20));
+        const __m256i vk20x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 640)));
+        i20 += 16;
+
+        const __m256i vprod20x0123456789ABCDEF = _mm256_mullo_epi16(vi20x0123456789ABCDEF, vk20x0123456789ABCDEF);
+        const __m128i vprod20x89ABCDEF = _mm256_extracti128_si256(vprod20x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod20x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod20x89ABCDEF));
+
+        const __m256i vi21x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i21));
+        const __m256i vk21x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 672)));
+        i21 += 16;
+
+        const __m256i vprod21x0123456789ABCDEF = _mm256_mullo_epi16(vi21x0123456789ABCDEF, vk21x0123456789ABCDEF);
+        const __m128i vprod21x89ABCDEF = _mm256_extracti128_si256(vprod21x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod21x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod21x89ABCDEF));
+
+        const __m256i vi22x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i22));
+        const __m256i vk22x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 704)));
+        i22 += 16;
+
+        const __m256i vprod22x0123456789ABCDEF = _mm256_mullo_epi16(vi22x0123456789ABCDEF, vk22x0123456789ABCDEF);
+        const __m128i vprod22x89ABCDEF = _mm256_extracti128_si256(vprod22x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod22x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod22x89ABCDEF));
+
+        const __m256i vi23x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i23));
+        const __m256i vk23x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 736)));
+        i23 += 16;
+
+        const __m256i vprod23x0123456789ABCDEF = _mm256_mullo_epi16(vi23x0123456789ABCDEF, vk23x0123456789ABCDEF);
+        const __m128i vprod23x89ABCDEF = _mm256_extracti128_si256(vprod23x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod23x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod23x89ABCDEF));
+
+        const __m256i vi24x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i24));
+        const __m256i vk24x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 768)));
+        i24 += 16;
+
+        const __m256i vprod24x0123456789ABCDEF = _mm256_mullo_epi16(vi24x0123456789ABCDEF, vk24x0123456789ABCDEF);
+        const __m128i vprod24x89ABCDEF = _mm256_extracti128_si256(vprod24x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod24x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod24x89ABCDEF));
+
+        w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t));
+        k += 16;
+
+        __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+        __m256 vscaled89ABCDEF = _mm256_cvtepi32_ps(vacc89ABCDEF);
+
+        const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+        vscaled01234567 = _mm256_mul_ps(vscaled01234567, vscale);
+        vscaled89ABCDEF = _mm256_mul_ps(vscaled89ABCDEF, vscale);
+
+        vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+        vacc89ABCDEF = _mm256_cvtps_epi32(vscaled89ABCDEF);
+
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->fp32_avx2.output_zero_point);
+        __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
+        __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc89ABCDEF), _mm256_extracti128_si256(vacc89ABCDEF, 1)), voutput_zero_point);
+
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+
+        __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
+        vout0123456789ABCDEF = _mm_min_epi8(_mm_max_epi8(vout0123456789ABCDEF, voutput_min), voutput_max);
+
+        if XNN_LIKELY(c >= 16) {
+          _mm_storeu_si128((__m128i*) output, vout0123456789ABCDEF);
+          output += 16;
+          c -= 16;
+        } else {
+          if (c & 8) {
+            _mm_storel_epi64((__m128i*) output, vout0123456789ABCDEF);
+            vout0123456789ABCDEF = _mm_unpackhi_epi64(vout0123456789ABCDEF, vout0123456789ABCDEF);
+            output += 8;
+          }
+          if (c & 4) {
+            *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456789ABCDEF);
+            vout0123456789ABCDEF = _mm_srli_epi64(vout0123456789ABCDEF, 32);
+            output += 4;
+          }
+          if (c & 2) {
+            *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456789ABCDEF, 0);
+            vout0123456789ABCDEF = _mm_srli_epi32(vout0123456789ABCDEF, 16);
+            output += 2;
+          }
+          if (c & 1) {
+            *output = (int8_t) _mm_extract_epi8(vout0123456789ABCDEF, 0);
+            output += 1;
+          }
+          c = 0;
+        }
+      } while (c != 0);
+    }
+
+    output = (int8_t*) ((uintptr_t) output + output_increment);
+  } while (--output_width != 0);
+}
diff --git a/src/qs8-dwconv/gen/up32x25-minmax-fp32-avx2-mul32.c b/src/qs8-dwconv/gen/up32x25-minmax-fp32-avx2-mul32.c
new file mode 100644
index 0000000..f8017a2
--- /dev/null
+++ b/src/qs8-dwconv/gen/up32x25-minmax-fp32-avx2-mul32.c
@@ -0,0 +1,778 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-dwconv/unipass-avx2-mul32.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <immintrin.h>
+
+#include <xnnpack/dwconv.h>
+
+
+void xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32(
+    size_t channels,
+    size_t output_width,
+    const int8_t** input,
+    const void* weights,
+    int8_t* output,
+    size_t input_stride,
+    size_t output_increment,
+    size_t input_offset,
+    const int8_t* zero,
+    const union xnn_qs8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(channels != 0);
+  assert(output_width != 0);
+
+  do {
+    const int8_t* i0 = input[0];
+    assert(i0 != NULL);
+    if XNN_UNPREDICTABLE(i0 != zero) {
+      i0 = (const int8_t*) ((uintptr_t) i0 + input_offset);
+    }
+    const int8_t* i1 = input[1];
+    assert(i1 != NULL);
+    if XNN_UNPREDICTABLE(i1 != zero) {
+      i1 = (const int8_t*) ((uintptr_t) i1 + input_offset);
+    }
+    const int8_t* i2 = input[2];
+    assert(i2 != NULL);
+    if XNN_UNPREDICTABLE(i2 != zero) {
+      i2 = (const int8_t*) ((uintptr_t) i2 + input_offset);
+    }
+    const int8_t* i3 = input[3];
+    assert(i3 != NULL);
+    if XNN_UNPREDICTABLE(i3 != zero) {
+      i3 = (const int8_t*) ((uintptr_t) i3 + input_offset);
+    }
+    const int8_t* i4 = input[4];
+    assert(i4 != NULL);
+    if XNN_UNPREDICTABLE(i4 != zero) {
+      i4 = (const int8_t*) ((uintptr_t) i4 + input_offset);
+    }
+    const int8_t* i5 = input[5];
+    assert(i5 != NULL);
+    if XNN_UNPREDICTABLE(i5 != zero) {
+      i5 = (const int8_t*) ((uintptr_t) i5 + input_offset);
+    }
+    const int8_t* i6 = input[6];
+    assert(i6 != NULL);
+    if XNN_UNPREDICTABLE(i6 != zero) {
+      i6 = (const int8_t*) ((uintptr_t) i6 + input_offset);
+    }
+    const int8_t* i7 = input[7];
+    assert(i7 != NULL);
+    if XNN_UNPREDICTABLE(i7 != zero) {
+      i7 = (const int8_t*) ((uintptr_t) i7 + input_offset);
+    }
+    const int8_t* i8 = input[8];
+    assert(i8 != NULL);
+    if XNN_UNPREDICTABLE(i8 != zero) {
+      i8 = (const int8_t*) ((uintptr_t) i8 + input_offset);
+    }
+    const int8_t* i9 = input[9];
+    assert(i9 != NULL);
+    if XNN_UNPREDICTABLE(i9 != zero) {
+      i9 = (const int8_t*) ((uintptr_t) i9 + input_offset);
+    }
+    const int8_t* i10 = input[10];
+    assert(i10 != NULL);
+    if XNN_UNPREDICTABLE(i10 != zero) {
+      i10 = (const int8_t*) ((uintptr_t) i10 + input_offset);
+    }
+    const int8_t* i11 = input[11];
+    assert(i11 != NULL);
+    if XNN_UNPREDICTABLE(i11 != zero) {
+      i11 = (const int8_t*) ((uintptr_t) i11 + input_offset);
+    }
+    const int8_t* i12 = input[12];
+    assert(i12 != NULL);
+    if XNN_UNPREDICTABLE(i12 != zero) {
+      i12 = (const int8_t*) ((uintptr_t) i12 + input_offset);
+    }
+    const int8_t* i13 = input[13];
+    assert(i13 != NULL);
+    if XNN_UNPREDICTABLE(i13 != zero) {
+      i13 = (const int8_t*) ((uintptr_t) i13 + input_offset);
+    }
+    const int8_t* i14 = input[14];
+    assert(i14 != NULL);
+    if XNN_UNPREDICTABLE(i14 != zero) {
+      i14 = (const int8_t*) ((uintptr_t) i14 + input_offset);
+    }
+    const int8_t* i15 = input[15];
+    assert(i15 != NULL);
+    if XNN_UNPREDICTABLE(i15 != zero) {
+      i15 = (const int8_t*) ((uintptr_t) i15 + input_offset);
+    }
+    const int8_t* i16 = input[16];
+    assert(i16 != NULL);
+    if XNN_UNPREDICTABLE(i16 != zero) {
+      i16 = (const int8_t*) ((uintptr_t) i16 + input_offset);
+    }
+    const int8_t* i17 = input[17];
+    assert(i17 != NULL);
+    if XNN_UNPREDICTABLE(i17 != zero) {
+      i17 = (const int8_t*) ((uintptr_t) i17 + input_offset);
+    }
+    const int8_t* i18 = input[18];
+    assert(i18 != NULL);
+    if XNN_UNPREDICTABLE(i18 != zero) {
+      i18 = (const int8_t*) ((uintptr_t) i18 + input_offset);
+    }
+    const int8_t* i19 = input[19];
+    assert(i19 != NULL);
+    if XNN_UNPREDICTABLE(i19 != zero) {
+      i19 = (const int8_t*) ((uintptr_t) i19 + input_offset);
+    }
+    const int8_t* i20 = input[20];
+    assert(i20 != NULL);
+    if XNN_UNPREDICTABLE(i20 != zero) {
+      i20 = (const int8_t*) ((uintptr_t) i20 + input_offset);
+    }
+    const int8_t* i21 = input[21];
+    assert(i21 != NULL);
+    if XNN_UNPREDICTABLE(i21 != zero) {
+      i21 = (const int8_t*) ((uintptr_t) i21 + input_offset);
+    }
+    const int8_t* i22 = input[22];
+    assert(i22 != NULL);
+    if XNN_UNPREDICTABLE(i22 != zero) {
+      i22 = (const int8_t*) ((uintptr_t) i22 + input_offset);
+    }
+    const int8_t* i23 = input[23];
+    assert(i23 != NULL);
+    if XNN_UNPREDICTABLE(i23 != zero) {
+      i23 = (const int8_t*) ((uintptr_t) i23 + input_offset);
+    }
+    const int8_t* i24 = input[24];
+    assert(i24 != NULL);
+    if XNN_UNPREDICTABLE(i24 != zero) {
+      i24 = (const int8_t*) ((uintptr_t) i24 + input_offset);
+    }
+    input = (const int8_t**) ((uintptr_t) input + input_stride);
+
+    size_t c = channels;
+    const void* w = weights;
+    for (; c >= 32; c -= 32) {
+      __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+      __m256i vacc89ABCDEF = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 8 * sizeof(int32_t)));
+      __m256i vaccGHIJKLMN = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 16 * sizeof(int32_t)));
+      __m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 24 * sizeof(int32_t)));
+
+
+      const __m256i vi0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i0));
+      const __m256i vk0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 0 * sizeof(int8_t))));
+      const __m256i vi0x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i0 + 8)));
+      const __m256i vk0x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 8 * sizeof(int8_t))));
+      const __m256i vi0xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i0 + 16)));
+      const __m256i vk0xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 16 * sizeof(int8_t))));
+      const __m256i vi0xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i0 + 24)));
+      const __m256i vk0xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 24 * sizeof(int8_t))));
+      i0 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi0x01234567, vk0x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi0x89ABCDEF, vk0x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi0xGHIJKLMN, vk0xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi0xOPQRSTUV, vk0xOPQRSTUV));
+
+      const __m256i vi1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i1));
+      const __m256i vk1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 32 * sizeof(int8_t))));
+      const __m256i vi1x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i1 + 8)));
+      const __m256i vk1x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 40 * sizeof(int8_t))));
+      const __m256i vi1xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i1 + 16)));
+      const __m256i vk1xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 48 * sizeof(int8_t))));
+      const __m256i vi1xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i1 + 24)));
+      const __m256i vk1xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 56 * sizeof(int8_t))));
+      i1 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi1x01234567, vk1x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi1x89ABCDEF, vk1x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi1xGHIJKLMN, vk1xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi1xOPQRSTUV, vk1xOPQRSTUV));
+
+      const __m256i vi2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i2));
+      const __m256i vk2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 64 * sizeof(int8_t))));
+      const __m256i vi2x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i2 + 8)));
+      const __m256i vk2x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 72 * sizeof(int8_t))));
+      const __m256i vi2xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i2 + 16)));
+      const __m256i vk2xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 80 * sizeof(int8_t))));
+      const __m256i vi2xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i2 + 24)));
+      const __m256i vk2xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 88 * sizeof(int8_t))));
+      i2 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi2x01234567, vk2x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi2x89ABCDEF, vk2x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi2xGHIJKLMN, vk2xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi2xOPQRSTUV, vk2xOPQRSTUV));
+
+      const __m256i vi3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i3));
+      const __m256i vk3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 96 * sizeof(int8_t))));
+      const __m256i vi3x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i3 + 8)));
+      const __m256i vk3x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 104 * sizeof(int8_t))));
+      const __m256i vi3xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i3 + 16)));
+      const __m256i vk3xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 112 * sizeof(int8_t))));
+      const __m256i vi3xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i3 + 24)));
+      const __m256i vk3xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 120 * sizeof(int8_t))));
+      i3 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi3x01234567, vk3x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi3x89ABCDEF, vk3x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi3xGHIJKLMN, vk3xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi3xOPQRSTUV, vk3xOPQRSTUV));
+
+      const __m256i vi4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i4));
+      const __m256i vk4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 128 * sizeof(int8_t))));
+      const __m256i vi4x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i4 + 8)));
+      const __m256i vk4x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 136 * sizeof(int8_t))));
+      const __m256i vi4xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i4 + 16)));
+      const __m256i vk4xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 144 * sizeof(int8_t))));
+      const __m256i vi4xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i4 + 24)));
+      const __m256i vk4xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 152 * sizeof(int8_t))));
+      i4 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi4x01234567, vk4x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi4x89ABCDEF, vk4x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi4xGHIJKLMN, vk4xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi4xOPQRSTUV, vk4xOPQRSTUV));
+
+      const __m256i vi5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i5));
+      const __m256i vk5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 160 * sizeof(int8_t))));
+      const __m256i vi5x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i5 + 8)));
+      const __m256i vk5x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 168 * sizeof(int8_t))));
+      const __m256i vi5xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i5 + 16)));
+      const __m256i vk5xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 176 * sizeof(int8_t))));
+      const __m256i vi5xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i5 + 24)));
+      const __m256i vk5xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 184 * sizeof(int8_t))));
+      i5 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi5x01234567, vk5x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi5x89ABCDEF, vk5x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi5xGHIJKLMN, vk5xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi5xOPQRSTUV, vk5xOPQRSTUV));
+
+      const __m256i vi6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i6));
+      const __m256i vk6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 192 * sizeof(int8_t))));
+      const __m256i vi6x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i6 + 8)));
+      const __m256i vk6x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 200 * sizeof(int8_t))));
+      const __m256i vi6xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i6 + 16)));
+      const __m256i vk6xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 208 * sizeof(int8_t))));
+      const __m256i vi6xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i6 + 24)));
+      const __m256i vk6xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 216 * sizeof(int8_t))));
+      i6 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi6x01234567, vk6x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi6x89ABCDEF, vk6x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi6xGHIJKLMN, vk6xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi6xOPQRSTUV, vk6xOPQRSTUV));
+
+      const __m256i vi7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i7));
+      const __m256i vk7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 224 * sizeof(int8_t))));
+      const __m256i vi7x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i7 + 8)));
+      const __m256i vk7x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 232 * sizeof(int8_t))));
+      const __m256i vi7xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i7 + 16)));
+      const __m256i vk7xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 240 * sizeof(int8_t))));
+      const __m256i vi7xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i7 + 24)));
+      const __m256i vk7xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 248 * sizeof(int8_t))));
+      i7 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi7x01234567, vk7x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi7x89ABCDEF, vk7x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi7xGHIJKLMN, vk7xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi7xOPQRSTUV, vk7xOPQRSTUV));
+
+      const __m256i vi8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i8));
+      const __m256i vk8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 256 * sizeof(int8_t))));
+      const __m256i vi8x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i8 + 8)));
+      const __m256i vk8x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 264 * sizeof(int8_t))));
+      const __m256i vi8xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i8 + 16)));
+      const __m256i vk8xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 272 * sizeof(int8_t))));
+      const __m256i vi8xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i8 + 24)));
+      const __m256i vk8xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 280 * sizeof(int8_t))));
+      i8 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi8x01234567, vk8x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi8x89ABCDEF, vk8x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi8xGHIJKLMN, vk8xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi8xOPQRSTUV, vk8xOPQRSTUV));
+
+      const __m256i vi9x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i9));
+      const __m256i vk9x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 288 * sizeof(int8_t))));
+      const __m256i vi9x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i9 + 8)));
+      const __m256i vk9x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 296 * sizeof(int8_t))));
+      const __m256i vi9xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i9 + 16)));
+      const __m256i vk9xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 304 * sizeof(int8_t))));
+      const __m256i vi9xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i9 + 24)));
+      const __m256i vk9xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 312 * sizeof(int8_t))));
+      i9 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi9x01234567, vk9x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi9x89ABCDEF, vk9x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi9xGHIJKLMN, vk9xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi9xOPQRSTUV, vk9xOPQRSTUV));
+
+      const __m256i vi10x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i10));
+      const __m256i vk10x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 320 * sizeof(int8_t))));
+      const __m256i vi10x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i10 + 8)));
+      const __m256i vk10x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 328 * sizeof(int8_t))));
+      const __m256i vi10xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i10 + 16)));
+      const __m256i vk10xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 336 * sizeof(int8_t))));
+      const __m256i vi10xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i10 + 24)));
+      const __m256i vk10xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 344 * sizeof(int8_t))));
+      i10 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi10x01234567, vk10x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi10x89ABCDEF, vk10x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi10xGHIJKLMN, vk10xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi10xOPQRSTUV, vk10xOPQRSTUV));
+
+      const __m256i vi11x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i11));
+      const __m256i vk11x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 352 * sizeof(int8_t))));
+      const __m256i vi11x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i11 + 8)));
+      const __m256i vk11x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 360 * sizeof(int8_t))));
+      const __m256i vi11xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i11 + 16)));
+      const __m256i vk11xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 368 * sizeof(int8_t))));
+      const __m256i vi11xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i11 + 24)));
+      const __m256i vk11xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 376 * sizeof(int8_t))));
+      i11 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi11x01234567, vk11x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi11x89ABCDEF, vk11x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi11xGHIJKLMN, vk11xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi11xOPQRSTUV, vk11xOPQRSTUV));
+
+      const __m256i vi12x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i12));
+      const __m256i vk12x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 384 * sizeof(int8_t))));
+      const __m256i vi12x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i12 + 8)));
+      const __m256i vk12x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 392 * sizeof(int8_t))));
+      const __m256i vi12xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i12 + 16)));
+      const __m256i vk12xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 400 * sizeof(int8_t))));
+      const __m256i vi12xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i12 + 24)));
+      const __m256i vk12xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 408 * sizeof(int8_t))));
+      i12 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi12x01234567, vk12x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi12x89ABCDEF, vk12x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi12xGHIJKLMN, vk12xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi12xOPQRSTUV, vk12xOPQRSTUV));
+
+      const __m256i vi13x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i13));
+      const __m256i vk13x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 416 * sizeof(int8_t))));
+      const __m256i vi13x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i13 + 8)));
+      const __m256i vk13x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 424 * sizeof(int8_t))));
+      const __m256i vi13xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i13 + 16)));
+      const __m256i vk13xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 432 * sizeof(int8_t))));
+      const __m256i vi13xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i13 + 24)));
+      const __m256i vk13xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 440 * sizeof(int8_t))));
+      i13 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi13x01234567, vk13x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi13x89ABCDEF, vk13x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi13xGHIJKLMN, vk13xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi13xOPQRSTUV, vk13xOPQRSTUV));
+
+      const __m256i vi14x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i14));
+      const __m256i vk14x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 448 * sizeof(int8_t))));
+      const __m256i vi14x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i14 + 8)));
+      const __m256i vk14x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 456 * sizeof(int8_t))));
+      const __m256i vi14xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i14 + 16)));
+      const __m256i vk14xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 464 * sizeof(int8_t))));
+      const __m256i vi14xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i14 + 24)));
+      const __m256i vk14xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 472 * sizeof(int8_t))));
+      i14 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi14x01234567, vk14x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi14x89ABCDEF, vk14x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi14xGHIJKLMN, vk14xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi14xOPQRSTUV, vk14xOPQRSTUV));
+
+      const __m256i vi15x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i15));
+      const __m256i vk15x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 480 * sizeof(int8_t))));
+      const __m256i vi15x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i15 + 8)));
+      const __m256i vk15x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 488 * sizeof(int8_t))));
+      const __m256i vi15xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i15 + 16)));
+      const __m256i vk15xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 496 * sizeof(int8_t))));
+      const __m256i vi15xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i15 + 24)));
+      const __m256i vk15xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 504 * sizeof(int8_t))));
+      i15 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi15x01234567, vk15x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi15x89ABCDEF, vk15x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi15xGHIJKLMN, vk15xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi15xOPQRSTUV, vk15xOPQRSTUV));
+
+      const __m256i vi16x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i16));
+      const __m256i vk16x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 512 * sizeof(int8_t))));
+      const __m256i vi16x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i16 + 8)));
+      const __m256i vk16x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 520 * sizeof(int8_t))));
+      const __m256i vi16xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i16 + 16)));
+      const __m256i vk16xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 528 * sizeof(int8_t))));
+      const __m256i vi16xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i16 + 24)));
+      const __m256i vk16xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 536 * sizeof(int8_t))));
+      i16 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi16x01234567, vk16x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi16x89ABCDEF, vk16x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi16xGHIJKLMN, vk16xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi16xOPQRSTUV, vk16xOPQRSTUV));
+
+      const __m256i vi17x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i17));
+      const __m256i vk17x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 544 * sizeof(int8_t))));
+      const __m256i vi17x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i17 + 8)));
+      const __m256i vk17x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 552 * sizeof(int8_t))));
+      const __m256i vi17xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i17 + 16)));
+      const __m256i vk17xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 560 * sizeof(int8_t))));
+      const __m256i vi17xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i17 + 24)));
+      const __m256i vk17xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 568 * sizeof(int8_t))));
+      i17 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi17x01234567, vk17x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi17x89ABCDEF, vk17x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi17xGHIJKLMN, vk17xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi17xOPQRSTUV, vk17xOPQRSTUV));
+
+      const __m256i vi18x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i18));
+      const __m256i vk18x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 576 * sizeof(int8_t))));
+      const __m256i vi18x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i18 + 8)));
+      const __m256i vk18x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 584 * sizeof(int8_t))));
+      const __m256i vi18xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i18 + 16)));
+      const __m256i vk18xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 592 * sizeof(int8_t))));
+      const __m256i vi18xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i18 + 24)));
+      const __m256i vk18xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 600 * sizeof(int8_t))));
+      i18 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi18x01234567, vk18x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi18x89ABCDEF, vk18x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi18xGHIJKLMN, vk18xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi18xOPQRSTUV, vk18xOPQRSTUV));
+
+      const __m256i vi19x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i19));
+      const __m256i vk19x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 608 * sizeof(int8_t))));
+      const __m256i vi19x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i19 + 8)));
+      const __m256i vk19x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 616 * sizeof(int8_t))));
+      const __m256i vi19xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i19 + 16)));
+      const __m256i vk19xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 624 * sizeof(int8_t))));
+      const __m256i vi19xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i19 + 24)));
+      const __m256i vk19xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 632 * sizeof(int8_t))));
+      i19 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi19x01234567, vk19x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi19x89ABCDEF, vk19x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi19xGHIJKLMN, vk19xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi19xOPQRSTUV, vk19xOPQRSTUV));
+
+      const __m256i vi20x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i20));
+      const __m256i vk20x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 640 * sizeof(int8_t))));
+      const __m256i vi20x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i20 + 8)));
+      const __m256i vk20x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 648 * sizeof(int8_t))));
+      const __m256i vi20xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i20 + 16)));
+      const __m256i vk20xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 656 * sizeof(int8_t))));
+      const __m256i vi20xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i20 + 24)));
+      const __m256i vk20xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 664 * sizeof(int8_t))));
+      i20 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi20x01234567, vk20x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi20x89ABCDEF, vk20x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi20xGHIJKLMN, vk20xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi20xOPQRSTUV, vk20xOPQRSTUV));
+
+      const __m256i vi21x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i21));
+      const __m256i vk21x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 672 * sizeof(int8_t))));
+      const __m256i vi21x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i21 + 8)));
+      const __m256i vk21x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 680 * sizeof(int8_t))));
+      const __m256i vi21xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i21 + 16)));
+      const __m256i vk21xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 688 * sizeof(int8_t))));
+      const __m256i vi21xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i21 + 24)));
+      const __m256i vk21xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 696 * sizeof(int8_t))));
+      i21 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi21x01234567, vk21x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi21x89ABCDEF, vk21x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi21xGHIJKLMN, vk21xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi21xOPQRSTUV, vk21xOPQRSTUV));
+
+      const __m256i vi22x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i22));
+      const __m256i vk22x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 704 * sizeof(int8_t))));
+      const __m256i vi22x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i22 + 8)));
+      const __m256i vk22x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 712 * sizeof(int8_t))));
+      const __m256i vi22xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i22 + 16)));
+      const __m256i vk22xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 720 * sizeof(int8_t))));
+      const __m256i vi22xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i22 + 24)));
+      const __m256i vk22xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 728 * sizeof(int8_t))));
+      i22 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi22x01234567, vk22x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi22x89ABCDEF, vk22x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi22xGHIJKLMN, vk22xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi22xOPQRSTUV, vk22xOPQRSTUV));
+
+      const __m256i vi23x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i23));
+      const __m256i vk23x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 736 * sizeof(int8_t))));
+      const __m256i vi23x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i23 + 8)));
+      const __m256i vk23x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 744 * sizeof(int8_t))));
+      const __m256i vi23xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i23 + 16)));
+      const __m256i vk23xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 752 * sizeof(int8_t))));
+      const __m256i vi23xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i23 + 24)));
+      const __m256i vk23xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 760 * sizeof(int8_t))));
+      i23 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi23x01234567, vk23x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi23x89ABCDEF, vk23x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi23xGHIJKLMN, vk23xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi23xOPQRSTUV, vk23xOPQRSTUV));
+
+      const __m256i vi24x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i24));
+      const __m256i vk24x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 768 * sizeof(int8_t))));
+      const __m256i vi24x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i24 + 8)));
+      const __m256i vk24x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 776 * sizeof(int8_t))));
+      const __m256i vi24xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i24 + 16)));
+      const __m256i vk24xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 784 * sizeof(int8_t))));
+      const __m256i vi24xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i24 + 24)));
+      const __m256i vk24xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 792 * sizeof(int8_t))));
+      i24 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi24x01234567, vk24x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi24x89ABCDEF, vk24x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi24xGHIJKLMN, vk24xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi24xOPQRSTUV, vk24xOPQRSTUV));
+
+      w = (const void*) ((uintptr_t) w + 32 * sizeof(int32_t) + 800 * sizeof(int8_t));
+
+      __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+      __m256 vscaled89ABCDEF = _mm256_cvtepi32_ps(vacc89ABCDEF);
+      __m256 vscaledGHIJKLMN = _mm256_cvtepi32_ps(vaccGHIJKLMN);
+      __m256 vscaledOPQRSTUV = _mm256_cvtepi32_ps(vaccOPQRSTUV);
+
+      const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+      vscaled01234567 = _mm256_mul_ps(vscaled01234567, vscale);
+      vscaled89ABCDEF = _mm256_mul_ps(vscaled89ABCDEF, vscale);
+      vscaledGHIJKLMN = _mm256_mul_ps(vscaledGHIJKLMN, vscale);
+      vscaledOPQRSTUV = _mm256_mul_ps(vscaledOPQRSTUV, vscale);
+
+      vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+      vacc89ABCDEF = _mm256_cvtps_epi32(vscaled89ABCDEF);
+      vaccGHIJKLMN = _mm256_cvtps_epi32(vscaledGHIJKLMN);
+      vaccOPQRSTUV = _mm256_cvtps_epi32(vscaledOPQRSTUV);
+
+      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->fp32_avx2.output_zero_point);
+      __m256i vout012389AB4567CDEF = _mm256_adds_epi16(_mm256_packs_epi32(vacc01234567, vacc89ABCDEF), voutput_zero_point);
+      __m256i voutGHIJOPQRKLMNSTUV = _mm256_adds_epi16(_mm256_packs_epi32(vaccGHIJKLMN, vaccOPQRSTUV), voutput_zero_point);
+
+      __m128i vout0123456789ABCDEF = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(vout012389AB4567CDEF), _mm256_extracti128_si256(vout012389AB4567CDEF, 1)), _MM_SHUFFLE(3, 1, 2, 0));
+      __m128i voutGHIJKLMNOPQRSTUV = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(voutGHIJOPQRKLMNSTUV), _mm256_extracti128_si256(voutGHIJOPQRKLMNSTUV, 1)), _MM_SHUFFLE(3, 1, 2, 0));
+
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+      vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
+      vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
+      voutGHIJKLMNOPQRSTUV = _mm_max_epi8(voutGHIJKLMNOPQRSTUV, voutput_min);
+      voutGHIJKLMNOPQRSTUV = _mm_min_epi8(voutGHIJKLMNOPQRSTUV, voutput_max);
+
+      _mm_storeu_si128((__m128i*) output, vout0123456789ABCDEF);
+      _mm_storeu_si128((__m128i*) (output + 16), voutGHIJKLMNOPQRSTUV);
+      output += 32;
+    }
+    if XNN_UNLIKELY(c != 0) {
+      const int8_t* k = (const int8_t*) ((uintptr_t) w + 32 * sizeof(int32_t));
+      do {
+        __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+
+
+        const __m256i vi0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i0));
+        const __m256i vk0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) k));
+        i0 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi0x01234567, vk0x01234567));
+
+        const __m256i vi1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i1));
+        const __m256i vk1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 32)));
+        i1 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi1x01234567, vk1x01234567));
+
+        const __m256i vi2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i2));
+        const __m256i vk2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 64)));
+        i2 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi2x01234567, vk2x01234567));
+
+        const __m256i vi3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i3));
+        const __m256i vk3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 96)));
+        i3 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi3x01234567, vk3x01234567));
+
+        const __m256i vi4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i4));
+        const __m256i vk4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 128)));
+        i4 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi4x01234567, vk4x01234567));
+
+        const __m256i vi5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i5));
+        const __m256i vk5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 160)));
+        i5 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi5x01234567, vk5x01234567));
+
+        const __m256i vi6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i6));
+        const __m256i vk6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 192)));
+        i6 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi6x01234567, vk6x01234567));
+
+        const __m256i vi7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i7));
+        const __m256i vk7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 224)));
+        i7 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi7x01234567, vk7x01234567));
+
+        const __m256i vi8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i8));
+        const __m256i vk8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 256)));
+        i8 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi8x01234567, vk8x01234567));
+
+        const __m256i vi9x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i9));
+        const __m256i vk9x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 288)));
+        i9 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi9x01234567, vk9x01234567));
+
+        const __m256i vi10x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i10));
+        const __m256i vk10x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 320)));
+        i10 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi10x01234567, vk10x01234567));
+
+        const __m256i vi11x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i11));
+        const __m256i vk11x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 352)));
+        i11 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi11x01234567, vk11x01234567));
+
+        const __m256i vi12x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i12));
+        const __m256i vk12x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 384)));
+        i12 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi12x01234567, vk12x01234567));
+
+        const __m256i vi13x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i13));
+        const __m256i vk13x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 416)));
+        i13 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi13x01234567, vk13x01234567));
+
+        const __m256i vi14x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i14));
+        const __m256i vk14x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 448)));
+        i14 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi14x01234567, vk14x01234567));
+
+        const __m256i vi15x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i15));
+        const __m256i vk15x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 480)));
+        i15 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi15x01234567, vk15x01234567));
+
+        const __m256i vi16x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i16));
+        const __m256i vk16x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 512)));
+        i16 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi16x01234567, vk16x01234567));
+
+        const __m256i vi17x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i17));
+        const __m256i vk17x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 544)));
+        i17 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi17x01234567, vk17x01234567));
+
+        const __m256i vi18x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i18));
+        const __m256i vk18x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 576)));
+        i18 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi18x01234567, vk18x01234567));
+
+        const __m256i vi19x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i19));
+        const __m256i vk19x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 608)));
+        i19 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi19x01234567, vk19x01234567));
+
+        const __m256i vi20x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i20));
+        const __m256i vk20x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 640)));
+        i20 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi20x01234567, vk20x01234567));
+
+        const __m256i vi21x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i21));
+        const __m256i vk21x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 672)));
+        i21 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi21x01234567, vk21x01234567));
+
+        const __m256i vi22x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i22));
+        const __m256i vk22x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 704)));
+        i22 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi22x01234567, vk22x01234567));
+
+        const __m256i vi23x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i23));
+        const __m256i vk23x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 736)));
+        i23 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi23x01234567, vk23x01234567));
+
+        const __m256i vi24x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i24));
+        const __m256i vk24x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 768)));
+        i24 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi24x01234567, vk24x01234567));
+
+        w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
+        k += 8;
+
+        __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+        vscaled01234567 = _mm256_mul_ps(vscaled01234567, _mm256_load_ps(params->fp32_avx2.scale));
+        vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->fp32_avx2.output_zero_point);
+        __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
+
+        __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
+        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
+
+        if XNN_LIKELY(c >= 8) {
+          _mm_storel_epi64((__m128i*) output, vout0123456701234567);
+          output += 8;
+          c -= 8;
+        } else {
+          if (c & 4) {
+            *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567);
+            vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32);
+            output += 4;
+          }
+          if (c & 2) {
+            *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0);
+            vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16);
+            output += 2;
+          }
+          if (c & 1) {
+            *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0);
+            output += 1;
+          }
+          c = 0;
+        }
+      } while (c != 0);
+    }
+
+    output = (int8_t*) ((uintptr_t) output + output_increment);
+  } while (--output_width != 0);
+}
diff --git a/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul16.c b/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul16.c
index e984902..7965487 100644
--- a/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul16.c
+++ b/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul16.c
@@ -542,9 +542,8 @@
       vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod24xOPQRSTUV));
 
       w = (const void*) ((uintptr_t) w + 32 * sizeof(int32_t) + 800 * sizeof(int8_t));
-
-      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
       const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
       const __m256i vacc9BDF = _mm256_shuffle_epi32(vacc89ABCDEF, _MM_SHUFFLE(3, 3, 1, 1));
@@ -574,7 +573,7 @@
       const __m256i vq31prodGHIJKLMN = _mm256_blend_epi16(vq31prodGIKM, vq31prodHJLN, 0xCC);
       const __m256i vq31prodOPQRSTUV = _mm256_blend_epi16(vq31prodOQSU, vq31prodPRTV, 0xCC);
 
-      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
       const __m256i vrem01234567 =
         _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
       const __m256i vrem89ABCDEF =
@@ -584,8 +583,8 @@
       const __m256i vremOPQRSTUV =
         _mm256_add_epi32(_mm256_and_si256(vq31prodOPQRSTUV, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prodOPQRSTUV));
 
-      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
+      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_avx2.shift);
       vacc01234567 =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
       vacc89ABCDEF =
@@ -595,15 +594,15 @@
       vaccOPQRSTUV =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prodOPQRSTUV, vshift), _mm256_cmpgt_epi32(vremOPQRSTUV, vremainder_threshold));
 
-      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx2.output_zero_point);
+      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_zero_point);
       const __m256i vout012389AB4567CDEF = _mm256_adds_epi16(_mm256_packs_epi32(vacc01234567, vacc89ABCDEF), voutput_zero_point);
       const __m256i voutGHIJOPQRKLMNSTUV = _mm256_adds_epi16(_mm256_packs_epi32(vaccGHIJKLMN, vaccOPQRSTUV), voutput_zero_point);
 
       __m128i vout0123456789ABCDEF = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(vout012389AB4567CDEF), _mm256_extracti128_si256(vout012389AB4567CDEF, 1)), _MM_SHUFFLE(3, 1, 2, 0));
       __m128i voutGHIJKLMNOPQRSTUV = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(voutGHIJOPQRKLMNSTUV), _mm256_extracti128_si256(voutGHIJOPQRKLMNSTUV, 1)), _MM_SHUFFLE(3, 1, 2, 0));
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
       voutGHIJKLMNOPQRSTUV = _mm_max_epi8(voutGHIJKLMNOPQRSTUV, voutput_min);
@@ -848,8 +847,8 @@
         w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t));
         k += 16;
 
-        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
         const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
         const __m256i vacc9BDF = _mm256_shuffle_epi32(vacc89ABCDEF, _MM_SHUFFLE(3, 3, 1, 1));
@@ -867,25 +866,25 @@
         const __m256i vq31prod01234567 = _mm256_blend_epi16(vq31prod0246, vq31prod1357, 0xCC);
         const __m256i vq31prod89ABCDEF = _mm256_blend_epi16(vq31prod8ACE, vq31prod9BDF, 0xCC);
 
-        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
         const __m256i vrem01234567 =
           _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
         const __m256i vrem89ABCDEF =
           _mm256_add_epi32(_mm256_and_si256(vq31prod89ABCDEF, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod89ABCDEF));
 
-        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
+        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_avx2.shift);
         vacc01234567 =
           _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
         vacc89ABCDEF =
           _mm256_sub_epi32(_mm256_sra_epi32(vq31prod89ABCDEF, vshift), _mm256_cmpgt_epi32(vrem89ABCDEF, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->avx2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
         __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc89ABCDEF), _mm256_extracti128_si256(vacc89ABCDEF, 1)), voutput_zero_point);
 
-        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
-        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
 
         __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
         vout0123456789ABCDEF = _mm_min_epi8(_mm_max_epi8(vout0123456789ABCDEF, voutput_min), voutput_max);
diff --git a/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul32.c b/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul32.c
index 109cb1c..3d51fd6 100644
--- a/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul32.c
+++ b/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx2-mul32.c
@@ -543,8 +543,8 @@
 
       w = (const void*) ((uintptr_t) w + 32 * sizeof(int32_t) + 800 * sizeof(int8_t));
 
-      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
       const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
       const __m256i vacc9BDF = _mm256_shuffle_epi32(vacc89ABCDEF, _MM_SHUFFLE(3, 3, 1, 1));
@@ -574,7 +574,7 @@
       const __m256i vq31prodGHIJKLMN = _mm256_blend_epi16(vq31prodGIKM, vq31prodHJLN, 0xCC);
       const __m256i vq31prodOPQRSTUV = _mm256_blend_epi16(vq31prodOQSU, vq31prodPRTV, 0xCC);
 
-      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
       const __m256i vrem01234567 =
         _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
       const __m256i vrem89ABCDEF =
@@ -584,8 +584,8 @@
       const __m256i vremOPQRSTUV =
         _mm256_add_epi32(_mm256_and_si256(vq31prodOPQRSTUV, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prodOPQRSTUV));
 
-      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
+      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_avx2.shift);
       vacc01234567 =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
       vacc89ABCDEF =
@@ -595,15 +595,15 @@
       vaccOPQRSTUV =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prodOPQRSTUV, vshift), _mm256_cmpgt_epi32(vremOPQRSTUV, vremainder_threshold));
 
-      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx2.output_zero_point);
+      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_zero_point);
       __m256i vout012389AB4567CDEF = _mm256_adds_epi16(_mm256_packs_epi32(vacc01234567, vacc89ABCDEF), voutput_zero_point);
       __m256i voutGHIJOPQRKLMNSTUV = _mm256_adds_epi16(_mm256_packs_epi32(vaccGHIJKLMN, vaccOPQRSTUV), voutput_zero_point);
 
       __m128i vout0123456789ABCDEF = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(vout012389AB4567CDEF), _mm256_extracti128_si256(vout012389AB4567CDEF, 1)), _MM_SHUFFLE(3, 1, 2, 0));
       __m128i voutGHIJKLMNOPQRSTUV = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(voutGHIJOPQRKLMNSTUV), _mm256_extracti128_si256(voutGHIJOPQRKLMNSTUV, 1)), _MM_SHUFFLE(3, 1, 2, 0));
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
       voutGHIJKLMNOPQRSTUV = _mm_max_epi8(voutGHIJKLMNOPQRSTUV, voutput_min);
@@ -772,8 +772,8 @@
         w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
         k += 8;
 
-        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
         const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -785,21 +785,21 @@
 
         const __m256i vq31prod01234567 = _mm256_blend_epi16(vq31prod0246, vq31prod1357, 0xCC);
 
-        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
         const __m256i vrem01234567 =
           _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
 
-        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-        const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
+        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+        const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.shift);
         vacc01234567 =
           _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->avx2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
-        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
-        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
         vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
         vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
 
diff --git a/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx512skx-mul32.c b/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx512skx-mul32.c
index 4d0bcad..3dd25d0 100644
--- a/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx512skx-mul32.c
+++ b/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-avx512skx-mul32.c
@@ -31,14 +31,14 @@
   assert(output_width != 0);
 
   const __mmask16 vblend_mask = _cvtu32_mask16(0xAAAA);
-  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.multiplier));
-  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.rounding));
-  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.remainder_mask));
-  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.remainder_threshold));
-  const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
-  const __m512i voutput_zero_point = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.output_zero_point));
-  const __m512i voutput_min = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.output_min));
-  const __m512i voutput_max = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.output_max));
+  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier));
+  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding));
+  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask));
+  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold));
+  const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
+  const __m512i voutput_zero_point = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point));
+  const __m512i voutput_min = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min));
+  const __m512i voutput_max = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max));
   const __m256i vpermute_mask = _mm256_set_epi32(7, 3, 5, 1, 6, 2, 4, 0);
 
   do {
diff --git a/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-neon-mul16.c b/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-neon-mul16.c
index d623520..2ae31b7 100644
--- a/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-neon-mul16.c
+++ b/src/qs8-dwconv/gen/up32x25-minmax-gemmlowp-neon-mul16.c
@@ -29,12 +29,12 @@
   assert(channels != 0);
   assert(output_width != 0);
 
-  const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
-  const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+  const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
+  const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
   const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
-  const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
-  const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-  const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+  const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
+  const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+  const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
   do {
     const int8_t* i0 = input[0];
     assert(i0 != NULL);
diff --git a/src/qs8-dwconv/gen/up32x9-minmax-fp32-avx2-mul16.c b/src/qs8-dwconv/gen/up32x9-minmax-fp32-avx2-mul16.c
new file mode 100644
index 0000000..258a80b
--- /dev/null
+++ b/src/qs8-dwconv/gen/up32x9-minmax-fp32-avx2-mul16.c
@@ -0,0 +1,401 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-dwconv/unipass-avx2-mul16.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <immintrin.h>
+
+#include <xnnpack/dwconv.h>
+
+
+void xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16(
+    size_t channels,
+    size_t output_width,
+    const int8_t** input,
+    const void* weights,
+    int8_t* output,
+    size_t input_stride,
+    size_t output_increment,
+    size_t input_offset,
+    const int8_t* zero,
+    const union xnn_qs8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(channels != 0);
+  assert(output_width != 0);
+
+  do {
+    const int8_t* i0 = input[0];
+    assert(i0 != NULL);
+    if XNN_UNPREDICTABLE(i0 != zero) {
+      i0 = (const int8_t*) ((uintptr_t) i0 + input_offset);
+    }
+    const int8_t* i1 = input[1];
+    assert(i1 != NULL);
+    if XNN_UNPREDICTABLE(i1 != zero) {
+      i1 = (const int8_t*) ((uintptr_t) i1 + input_offset);
+    }
+    const int8_t* i2 = input[2];
+    assert(i2 != NULL);
+    if XNN_UNPREDICTABLE(i2 != zero) {
+      i2 = (const int8_t*) ((uintptr_t) i2 + input_offset);
+    }
+    const int8_t* i3 = input[3];
+    assert(i3 != NULL);
+    if XNN_UNPREDICTABLE(i3 != zero) {
+      i3 = (const int8_t*) ((uintptr_t) i3 + input_offset);
+    }
+    const int8_t* i4 = input[4];
+    assert(i4 != NULL);
+    if XNN_UNPREDICTABLE(i4 != zero) {
+      i4 = (const int8_t*) ((uintptr_t) i4 + input_offset);
+    }
+    const int8_t* i5 = input[5];
+    assert(i5 != NULL);
+    if XNN_UNPREDICTABLE(i5 != zero) {
+      i5 = (const int8_t*) ((uintptr_t) i5 + input_offset);
+    }
+    const int8_t* i6 = input[6];
+    assert(i6 != NULL);
+    if XNN_UNPREDICTABLE(i6 != zero) {
+      i6 = (const int8_t*) ((uintptr_t) i6 + input_offset);
+    }
+    const int8_t* i7 = input[7];
+    assert(i7 != NULL);
+    if XNN_UNPREDICTABLE(i7 != zero) {
+      i7 = (const int8_t*) ((uintptr_t) i7 + input_offset);
+    }
+    const int8_t* i8 = input[8];
+    assert(i8 != NULL);
+    if XNN_UNPREDICTABLE(i8 != zero) {
+      i8 = (const int8_t*) ((uintptr_t) i8 + input_offset);
+    }
+    input = (const int8_t**) ((uintptr_t) input + input_stride);
+
+    size_t c = channels;
+    const void* w = weights;
+    for (; c >= 32; c -= 32) {
+      __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+      __m256i vacc89ABCDEF = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 8 * sizeof(int32_t)));
+      __m256i vaccGHIJKLMN = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 16 * sizeof(int32_t)));
+      __m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 24 * sizeof(int32_t)));
+
+
+      const __m256i vi0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i0));
+      const __m256i vk0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 0 * sizeof(int8_t))));
+      const __m256i vi0xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i0 + 16)));
+      const __m256i vk0xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 16 * sizeof(int8_t))));
+      i0 += 32;
+
+      const __m256i vprod0x0123456789ABCDEF =  _mm256_mullo_epi16(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF);
+      const __m128i vprod0x89ABCDEF = _mm256_extracti128_si256(vprod0x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod0x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod0x89ABCDEF));
+      const __m256i vprod0xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi0xGHIJKLMNOPQRSTUV, vk0xGHIJKLMNOPQRSTUV);
+      const __m128i vprod0xOPQRSTUV = _mm256_extracti128_si256(vprod0xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod0xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod0xOPQRSTUV));
+
+      const __m256i vi1x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i1));
+      const __m256i vk1x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 32 * sizeof(int8_t))));
+      const __m256i vi1xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i1 + 16)));
+      const __m256i vk1xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 48 * sizeof(int8_t))));
+      i1 += 32;
+
+      const __m256i vprod1x0123456789ABCDEF =  _mm256_mullo_epi16(vi1x0123456789ABCDEF, vk1x0123456789ABCDEF);
+      const __m128i vprod1x89ABCDEF = _mm256_extracti128_si256(vprod1x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod1x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod1x89ABCDEF));
+      const __m256i vprod1xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi1xGHIJKLMNOPQRSTUV, vk1xGHIJKLMNOPQRSTUV);
+      const __m128i vprod1xOPQRSTUV = _mm256_extracti128_si256(vprod1xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod1xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod1xOPQRSTUV));
+
+      const __m256i vi2x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i2));
+      const __m256i vk2x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 64 * sizeof(int8_t))));
+      const __m256i vi2xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i2 + 16)));
+      const __m256i vk2xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 80 * sizeof(int8_t))));
+      i2 += 32;
+
+      const __m256i vprod2x0123456789ABCDEF =  _mm256_mullo_epi16(vi2x0123456789ABCDEF, vk2x0123456789ABCDEF);
+      const __m128i vprod2x89ABCDEF = _mm256_extracti128_si256(vprod2x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod2x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod2x89ABCDEF));
+      const __m256i vprod2xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi2xGHIJKLMNOPQRSTUV, vk2xGHIJKLMNOPQRSTUV);
+      const __m128i vprod2xOPQRSTUV = _mm256_extracti128_si256(vprod2xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod2xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod2xOPQRSTUV));
+
+      const __m256i vi3x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i3));
+      const __m256i vk3x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 96 * sizeof(int8_t))));
+      const __m256i vi3xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i3 + 16)));
+      const __m256i vk3xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 112 * sizeof(int8_t))));
+      i3 += 32;
+
+      const __m256i vprod3x0123456789ABCDEF =  _mm256_mullo_epi16(vi3x0123456789ABCDEF, vk3x0123456789ABCDEF);
+      const __m128i vprod3x89ABCDEF = _mm256_extracti128_si256(vprod3x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod3x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod3x89ABCDEF));
+      const __m256i vprod3xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi3xGHIJKLMNOPQRSTUV, vk3xGHIJKLMNOPQRSTUV);
+      const __m128i vprod3xOPQRSTUV = _mm256_extracti128_si256(vprod3xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod3xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod3xOPQRSTUV));
+
+      const __m256i vi4x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i4));
+      const __m256i vk4x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 128 * sizeof(int8_t))));
+      const __m256i vi4xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i4 + 16)));
+      const __m256i vk4xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 144 * sizeof(int8_t))));
+      i4 += 32;
+
+      const __m256i vprod4x0123456789ABCDEF =  _mm256_mullo_epi16(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF);
+      const __m128i vprod4x89ABCDEF = _mm256_extracti128_si256(vprod4x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod4x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod4x89ABCDEF));
+      const __m256i vprod4xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi4xGHIJKLMNOPQRSTUV, vk4xGHIJKLMNOPQRSTUV);
+      const __m128i vprod4xOPQRSTUV = _mm256_extracti128_si256(vprod4xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod4xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod4xOPQRSTUV));
+
+      const __m256i vi5x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i5));
+      const __m256i vk5x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 160 * sizeof(int8_t))));
+      const __m256i vi5xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i5 + 16)));
+      const __m256i vk5xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 176 * sizeof(int8_t))));
+      i5 += 32;
+
+      const __m256i vprod5x0123456789ABCDEF =  _mm256_mullo_epi16(vi5x0123456789ABCDEF, vk5x0123456789ABCDEF);
+      const __m128i vprod5x89ABCDEF = _mm256_extracti128_si256(vprod5x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod5x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod5x89ABCDEF));
+      const __m256i vprod5xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi5xGHIJKLMNOPQRSTUV, vk5xGHIJKLMNOPQRSTUV);
+      const __m128i vprod5xOPQRSTUV = _mm256_extracti128_si256(vprod5xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod5xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod5xOPQRSTUV));
+
+      const __m256i vi6x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i6));
+      const __m256i vk6x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 192 * sizeof(int8_t))));
+      const __m256i vi6xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i6 + 16)));
+      const __m256i vk6xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 208 * sizeof(int8_t))));
+      i6 += 32;
+
+      const __m256i vprod6x0123456789ABCDEF =  _mm256_mullo_epi16(vi6x0123456789ABCDEF, vk6x0123456789ABCDEF);
+      const __m128i vprod6x89ABCDEF = _mm256_extracti128_si256(vprod6x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod6x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod6x89ABCDEF));
+      const __m256i vprod6xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi6xGHIJKLMNOPQRSTUV, vk6xGHIJKLMNOPQRSTUV);
+      const __m128i vprod6xOPQRSTUV = _mm256_extracti128_si256(vprod6xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod6xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod6xOPQRSTUV));
+
+      const __m256i vi7x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i7));
+      const __m256i vk7x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 224 * sizeof(int8_t))));
+      const __m256i vi7xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i7 + 16)));
+      const __m256i vk7xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 240 * sizeof(int8_t))));
+      i7 += 32;
+
+      const __m256i vprod7x0123456789ABCDEF =  _mm256_mullo_epi16(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF);
+      const __m128i vprod7x89ABCDEF = _mm256_extracti128_si256(vprod7x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod7x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod7x89ABCDEF));
+      const __m256i vprod7xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi7xGHIJKLMNOPQRSTUV, vk7xGHIJKLMNOPQRSTUV);
+      const __m128i vprod7xOPQRSTUV = _mm256_extracti128_si256(vprod7xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod7xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod7xOPQRSTUV));
+
+      const __m256i vi8x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i8));
+      const __m256i vk8x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 256 * sizeof(int8_t))));
+      const __m256i vi8xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (i8 + 16)));
+      const __m256i vk8xGHIJKLMNOPQRSTUV = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 272 * sizeof(int8_t))));
+      i8 += 32;
+
+      const __m256i vprod8x0123456789ABCDEF =  _mm256_mullo_epi16(vi8x0123456789ABCDEF, vk8x0123456789ABCDEF);
+      const __m128i vprod8x89ABCDEF = _mm256_extracti128_si256(vprod8x0123456789ABCDEF, 1);
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod8x0123456789ABCDEF)));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod8x89ABCDEF));
+      const __m256i vprod8xGHIJKLMNOPQRSTUV =  _mm256_mullo_epi16(vi8xGHIJKLMNOPQRSTUV, vk8xGHIJKLMNOPQRSTUV);
+      const __m128i vprod8xOPQRSTUV = _mm256_extracti128_si256(vprod8xGHIJKLMNOPQRSTUV, 1);
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod8xGHIJKLMNOPQRSTUV)));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod8xOPQRSTUV));
+
+      w = (const void*) ((uintptr_t) w + 32 * sizeof(int32_t) + 288 * sizeof(int8_t));
+      __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+      __m256 vscaled89ABCDEF = _mm256_cvtepi32_ps(vacc89ABCDEF);
+      __m256 vscaledGHIJKLMN = _mm256_cvtepi32_ps(vaccGHIJKLMN);
+      __m256 vscaledOPQRSTUV = _mm256_cvtepi32_ps(vaccOPQRSTUV);
+
+      const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+      vscaled01234567 = _mm256_mul_ps(vscaled01234567, vscale);
+      vscaled89ABCDEF = _mm256_mul_ps(vscaled89ABCDEF, vscale);
+      vscaledGHIJKLMN = _mm256_mul_ps(vscaledGHIJKLMN, vscale);
+      vscaledOPQRSTUV = _mm256_mul_ps(vscaledOPQRSTUV, vscale);
+
+      vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+      vacc89ABCDEF = _mm256_cvtps_epi32(vscaled89ABCDEF);
+      vaccGHIJKLMN = _mm256_cvtps_epi32(vscaledGHIJKLMN);
+      vaccOPQRSTUV = _mm256_cvtps_epi32(vscaledOPQRSTUV);
+
+      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->fp32_avx2.output_zero_point);
+      const __m256i vout012389AB4567CDEF = _mm256_adds_epi16(_mm256_packs_epi32(vacc01234567, vacc89ABCDEF), voutput_zero_point);
+      const __m256i voutGHIJOPQRKLMNSTUV = _mm256_adds_epi16(_mm256_packs_epi32(vaccGHIJKLMN, vaccOPQRSTUV), voutput_zero_point);
+
+      __m128i vout0123456789ABCDEF = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(vout012389AB4567CDEF), _mm256_extracti128_si256(vout012389AB4567CDEF, 1)), _MM_SHUFFLE(3, 1, 2, 0));
+      __m128i voutGHIJKLMNOPQRSTUV = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(voutGHIJOPQRKLMNSTUV), _mm256_extracti128_si256(voutGHIJOPQRKLMNSTUV, 1)), _MM_SHUFFLE(3, 1, 2, 0));
+
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+      vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
+      vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
+      voutGHIJKLMNOPQRSTUV = _mm_max_epi8(voutGHIJKLMNOPQRSTUV, voutput_min);
+      voutGHIJKLMNOPQRSTUV = _mm_min_epi8(voutGHIJKLMNOPQRSTUV, voutput_max);
+
+      _mm_storeu_si128((__m128i*) output, vout0123456789ABCDEF);
+      _mm_storeu_si128((__m128i*) (output + 16), voutGHIJKLMNOPQRSTUV);
+      output += 32;
+    }
+    if XNN_UNLIKELY(c != 0) {
+      const int8_t* k = (const int8_t*) ((uintptr_t) w + 32 * sizeof(int32_t));
+      do {
+        __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+        __m256i vacc89ABCDEF = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 8 * sizeof(int32_t)));
+
+
+        const __m256i vi0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i0));
+        const __m256i vk0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) k));
+        i0 += 16;
+
+        const __m256i vprod0x0123456789ABCDEF = _mm256_mullo_epi16(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF);
+        const __m128i vprod0x89ABCDEF = _mm256_extracti128_si256(vprod0x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod0x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod0x89ABCDEF));
+
+        const __m256i vi1x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i1));
+        const __m256i vk1x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 32)));
+        i1 += 16;
+
+        const __m256i vprod1x0123456789ABCDEF = _mm256_mullo_epi16(vi1x0123456789ABCDEF, vk1x0123456789ABCDEF);
+        const __m128i vprod1x89ABCDEF = _mm256_extracti128_si256(vprod1x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod1x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod1x89ABCDEF));
+
+        const __m256i vi2x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i2));
+        const __m256i vk2x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 64)));
+        i2 += 16;
+
+        const __m256i vprod2x0123456789ABCDEF = _mm256_mullo_epi16(vi2x0123456789ABCDEF, vk2x0123456789ABCDEF);
+        const __m128i vprod2x89ABCDEF = _mm256_extracti128_si256(vprod2x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod2x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod2x89ABCDEF));
+
+        const __m256i vi3x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i3));
+        const __m256i vk3x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 96)));
+        i3 += 16;
+
+        const __m256i vprod3x0123456789ABCDEF = _mm256_mullo_epi16(vi3x0123456789ABCDEF, vk3x0123456789ABCDEF);
+        const __m128i vprod3x89ABCDEF = _mm256_extracti128_si256(vprod3x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod3x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod3x89ABCDEF));
+
+        const __m256i vi4x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i4));
+        const __m256i vk4x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 128)));
+        i4 += 16;
+
+        const __m256i vprod4x0123456789ABCDEF = _mm256_mullo_epi16(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF);
+        const __m128i vprod4x89ABCDEF = _mm256_extracti128_si256(vprod4x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod4x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod4x89ABCDEF));
+
+        const __m256i vi5x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i5));
+        const __m256i vk5x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 160)));
+        i5 += 16;
+
+        const __m256i vprod5x0123456789ABCDEF = _mm256_mullo_epi16(vi5x0123456789ABCDEF, vk5x0123456789ABCDEF);
+        const __m128i vprod5x89ABCDEF = _mm256_extracti128_si256(vprod5x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod5x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod5x89ABCDEF));
+
+        const __m256i vi6x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i6));
+        const __m256i vk6x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 192)));
+        i6 += 16;
+
+        const __m256i vprod6x0123456789ABCDEF = _mm256_mullo_epi16(vi6x0123456789ABCDEF, vk6x0123456789ABCDEF);
+        const __m128i vprod6x89ABCDEF = _mm256_extracti128_si256(vprod6x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod6x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod6x89ABCDEF));
+
+        const __m256i vi7x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i7));
+        const __m256i vk7x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 224)));
+        i7 += 16;
+
+        const __m256i vprod7x0123456789ABCDEF = _mm256_mullo_epi16(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF);
+        const __m128i vprod7x89ABCDEF = _mm256_extracti128_si256(vprod7x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod7x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod7x89ABCDEF));
+
+        const __m256i vi8x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) i8));
+        const __m256i vk8x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 256)));
+        i8 += 16;
+
+        const __m256i vprod8x0123456789ABCDEF = _mm256_mullo_epi16(vi8x0123456789ABCDEF, vk8x0123456789ABCDEF);
+        const __m128i vprod8x89ABCDEF = _mm256_extracti128_si256(vprod8x0123456789ABCDEF, 1);
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(vprod8x0123456789ABCDEF)));
+        vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_cvtepi16_epi32(vprod8x89ABCDEF));
+
+        w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t));
+        k += 16;
+
+        __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+        __m256 vscaled89ABCDEF = _mm256_cvtepi32_ps(vacc89ABCDEF);
+
+        const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+        vscaled01234567 = _mm256_mul_ps(vscaled01234567, vscale);
+        vscaled89ABCDEF = _mm256_mul_ps(vscaled89ABCDEF, vscale);
+
+        vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+        vacc89ABCDEF = _mm256_cvtps_epi32(vscaled89ABCDEF);
+
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->fp32_avx2.output_zero_point);
+        __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
+        __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc89ABCDEF), _mm256_extracti128_si256(vacc89ABCDEF, 1)), voutput_zero_point);
+
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+
+        __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
+        vout0123456789ABCDEF = _mm_min_epi8(_mm_max_epi8(vout0123456789ABCDEF, voutput_min), voutput_max);
+
+        if XNN_LIKELY(c >= 16) {
+          _mm_storeu_si128((__m128i*) output, vout0123456789ABCDEF);
+          output += 16;
+          c -= 16;
+        } else {
+          if (c & 8) {
+            _mm_storel_epi64((__m128i*) output, vout0123456789ABCDEF);
+            vout0123456789ABCDEF = _mm_unpackhi_epi64(vout0123456789ABCDEF, vout0123456789ABCDEF);
+            output += 8;
+          }
+          if (c & 4) {
+            *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456789ABCDEF);
+            vout0123456789ABCDEF = _mm_srli_epi64(vout0123456789ABCDEF, 32);
+            output += 4;
+          }
+          if (c & 2) {
+            *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456789ABCDEF, 0);
+            vout0123456789ABCDEF = _mm_srli_epi32(vout0123456789ABCDEF, 16);
+            output += 2;
+          }
+          if (c & 1) {
+            *output = (int8_t) _mm_extract_epi8(vout0123456789ABCDEF, 0);
+            output += 1;
+          }
+          c = 0;
+        }
+      } while (c != 0);
+    }
+
+    output = (int8_t*) ((uintptr_t) output + output_increment);
+  } while (--output_width != 0);
+}
diff --git a/src/qs8-dwconv/gen/up32x9-minmax-fp32-avx2-mul32.c b/src/qs8-dwconv/gen/up32x9-minmax-fp32-avx2-mul32.c
new file mode 100644
index 0000000..5bde3a6
--- /dev/null
+++ b/src/qs8-dwconv/gen/up32x9-minmax-fp32-avx2-mul32.c
@@ -0,0 +1,362 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-dwconv/unipass-avx2-mul32.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <immintrin.h>
+
+#include <xnnpack/dwconv.h>
+
+
+void xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32(
+    size_t channels,
+    size_t output_width,
+    const int8_t** input,
+    const void* weights,
+    int8_t* output,
+    size_t input_stride,
+    size_t output_increment,
+    size_t input_offset,
+    const int8_t* zero,
+    const union xnn_qs8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(channels != 0);
+  assert(output_width != 0);
+
+  do {
+    const int8_t* i0 = input[0];
+    assert(i0 != NULL);
+    if XNN_UNPREDICTABLE(i0 != zero) {
+      i0 = (const int8_t*) ((uintptr_t) i0 + input_offset);
+    }
+    const int8_t* i1 = input[1];
+    assert(i1 != NULL);
+    if XNN_UNPREDICTABLE(i1 != zero) {
+      i1 = (const int8_t*) ((uintptr_t) i1 + input_offset);
+    }
+    const int8_t* i2 = input[2];
+    assert(i2 != NULL);
+    if XNN_UNPREDICTABLE(i2 != zero) {
+      i2 = (const int8_t*) ((uintptr_t) i2 + input_offset);
+    }
+    const int8_t* i3 = input[3];
+    assert(i3 != NULL);
+    if XNN_UNPREDICTABLE(i3 != zero) {
+      i3 = (const int8_t*) ((uintptr_t) i3 + input_offset);
+    }
+    const int8_t* i4 = input[4];
+    assert(i4 != NULL);
+    if XNN_UNPREDICTABLE(i4 != zero) {
+      i4 = (const int8_t*) ((uintptr_t) i4 + input_offset);
+    }
+    const int8_t* i5 = input[5];
+    assert(i5 != NULL);
+    if XNN_UNPREDICTABLE(i5 != zero) {
+      i5 = (const int8_t*) ((uintptr_t) i5 + input_offset);
+    }
+    const int8_t* i6 = input[6];
+    assert(i6 != NULL);
+    if XNN_UNPREDICTABLE(i6 != zero) {
+      i6 = (const int8_t*) ((uintptr_t) i6 + input_offset);
+    }
+    const int8_t* i7 = input[7];
+    assert(i7 != NULL);
+    if XNN_UNPREDICTABLE(i7 != zero) {
+      i7 = (const int8_t*) ((uintptr_t) i7 + input_offset);
+    }
+    const int8_t* i8 = input[8];
+    assert(i8 != NULL);
+    if XNN_UNPREDICTABLE(i8 != zero) {
+      i8 = (const int8_t*) ((uintptr_t) i8 + input_offset);
+    }
+    input = (const int8_t**) ((uintptr_t) input + input_stride);
+
+    size_t c = channels;
+    const void* w = weights;
+    for (; c >= 32; c -= 32) {
+      __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+      __m256i vacc89ABCDEF = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 8 * sizeof(int32_t)));
+      __m256i vaccGHIJKLMN = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 16 * sizeof(int32_t)));
+      __m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 24 * sizeof(int32_t)));
+
+
+      const __m256i vi0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i0));
+      const __m256i vk0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 0 * sizeof(int8_t))));
+      const __m256i vi0x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i0 + 8)));
+      const __m256i vk0x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 8 * sizeof(int8_t))));
+      const __m256i vi0xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i0 + 16)));
+      const __m256i vk0xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 16 * sizeof(int8_t))));
+      const __m256i vi0xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i0 + 24)));
+      const __m256i vk0xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 24 * sizeof(int8_t))));
+      i0 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi0x01234567, vk0x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi0x89ABCDEF, vk0x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi0xGHIJKLMN, vk0xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi0xOPQRSTUV, vk0xOPQRSTUV));
+
+      const __m256i vi1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i1));
+      const __m256i vk1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 32 * sizeof(int8_t))));
+      const __m256i vi1x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i1 + 8)));
+      const __m256i vk1x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 40 * sizeof(int8_t))));
+      const __m256i vi1xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i1 + 16)));
+      const __m256i vk1xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 48 * sizeof(int8_t))));
+      const __m256i vi1xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i1 + 24)));
+      const __m256i vk1xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 56 * sizeof(int8_t))));
+      i1 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi1x01234567, vk1x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi1x89ABCDEF, vk1x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi1xGHIJKLMN, vk1xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi1xOPQRSTUV, vk1xOPQRSTUV));
+
+      const __m256i vi2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i2));
+      const __m256i vk2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 64 * sizeof(int8_t))));
+      const __m256i vi2x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i2 + 8)));
+      const __m256i vk2x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 72 * sizeof(int8_t))));
+      const __m256i vi2xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i2 + 16)));
+      const __m256i vk2xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 80 * sizeof(int8_t))));
+      const __m256i vi2xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i2 + 24)));
+      const __m256i vk2xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 88 * sizeof(int8_t))));
+      i2 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi2x01234567, vk2x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi2x89ABCDEF, vk2x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi2xGHIJKLMN, vk2xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi2xOPQRSTUV, vk2xOPQRSTUV));
+
+      const __m256i vi3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i3));
+      const __m256i vk3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 96 * sizeof(int8_t))));
+      const __m256i vi3x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i3 + 8)));
+      const __m256i vk3x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 104 * sizeof(int8_t))));
+      const __m256i vi3xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i3 + 16)));
+      const __m256i vk3xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 112 * sizeof(int8_t))));
+      const __m256i vi3xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i3 + 24)));
+      const __m256i vk3xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 120 * sizeof(int8_t))));
+      i3 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi3x01234567, vk3x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi3x89ABCDEF, vk3x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi3xGHIJKLMN, vk3xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi3xOPQRSTUV, vk3xOPQRSTUV));
+
+      const __m256i vi4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i4));
+      const __m256i vk4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 128 * sizeof(int8_t))));
+      const __m256i vi4x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i4 + 8)));
+      const __m256i vk4x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 136 * sizeof(int8_t))));
+      const __m256i vi4xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i4 + 16)));
+      const __m256i vk4xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 144 * sizeof(int8_t))));
+      const __m256i vi4xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i4 + 24)));
+      const __m256i vk4xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 152 * sizeof(int8_t))));
+      i4 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi4x01234567, vk4x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi4x89ABCDEF, vk4x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi4xGHIJKLMN, vk4xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi4xOPQRSTUV, vk4xOPQRSTUV));
+
+      const __m256i vi5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i5));
+      const __m256i vk5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 160 * sizeof(int8_t))));
+      const __m256i vi5x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i5 + 8)));
+      const __m256i vk5x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 168 * sizeof(int8_t))));
+      const __m256i vi5xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i5 + 16)));
+      const __m256i vk5xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 176 * sizeof(int8_t))));
+      const __m256i vi5xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i5 + 24)));
+      const __m256i vk5xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 184 * sizeof(int8_t))));
+      i5 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi5x01234567, vk5x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi5x89ABCDEF, vk5x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi5xGHIJKLMN, vk5xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi5xOPQRSTUV, vk5xOPQRSTUV));
+
+      const __m256i vi6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i6));
+      const __m256i vk6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 192 * sizeof(int8_t))));
+      const __m256i vi6x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i6 + 8)));
+      const __m256i vk6x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 200 * sizeof(int8_t))));
+      const __m256i vi6xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i6 + 16)));
+      const __m256i vk6xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 208 * sizeof(int8_t))));
+      const __m256i vi6xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i6 + 24)));
+      const __m256i vk6xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 216 * sizeof(int8_t))));
+      i6 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi6x01234567, vk6x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi6x89ABCDEF, vk6x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi6xGHIJKLMN, vk6xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi6xOPQRSTUV, vk6xOPQRSTUV));
+
+      const __m256i vi7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i7));
+      const __m256i vk7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 224 * sizeof(int8_t))));
+      const __m256i vi7x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i7 + 8)));
+      const __m256i vk7x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 232 * sizeof(int8_t))));
+      const __m256i vi7xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i7 + 16)));
+      const __m256i vk7xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 240 * sizeof(int8_t))));
+      const __m256i vi7xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i7 + 24)));
+      const __m256i vk7xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 248 * sizeof(int8_t))));
+      i7 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi7x01234567, vk7x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi7x89ABCDEF, vk7x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi7xGHIJKLMN, vk7xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi7xOPQRSTUV, vk7xOPQRSTUV));
+
+      const __m256i vi8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i8));
+      const __m256i vk8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 256 * sizeof(int8_t))));
+      const __m256i vi8x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i8 + 8)));
+      const __m256i vk8x89ABCDEF = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 264 * sizeof(int8_t))));
+      const __m256i vi8xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i8 + 16)));
+      const __m256i vk8xGHIJKLMN = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 272 * sizeof(int8_t))));
+      const __m256i vi8xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (i8 + 24)));
+      const __m256i vk8xOPQRSTUV = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 32 * sizeof(int32_t) + 280 * sizeof(int8_t))));
+      i8 += 32;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi8x01234567, vk8x01234567));
+      vacc89ABCDEF = _mm256_add_epi32(vacc89ABCDEF, _mm256_mullo_epi32(vi8x89ABCDEF, vk8x89ABCDEF));
+      vaccGHIJKLMN = _mm256_add_epi32(vaccGHIJKLMN, _mm256_mullo_epi32(vi8xGHIJKLMN, vk8xGHIJKLMN));
+      vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi8xOPQRSTUV, vk8xOPQRSTUV));
+
+      w = (const void*) ((uintptr_t) w + 32 * sizeof(int32_t) + 288 * sizeof(int8_t));
+
+      __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+      __m256 vscaled89ABCDEF = _mm256_cvtepi32_ps(vacc89ABCDEF);
+      __m256 vscaledGHIJKLMN = _mm256_cvtepi32_ps(vaccGHIJKLMN);
+      __m256 vscaledOPQRSTUV = _mm256_cvtepi32_ps(vaccOPQRSTUV);
+
+      const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+      vscaled01234567 = _mm256_mul_ps(vscaled01234567, vscale);
+      vscaled89ABCDEF = _mm256_mul_ps(vscaled89ABCDEF, vscale);
+      vscaledGHIJKLMN = _mm256_mul_ps(vscaledGHIJKLMN, vscale);
+      vscaledOPQRSTUV = _mm256_mul_ps(vscaledOPQRSTUV, vscale);
+
+      vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+      vacc89ABCDEF = _mm256_cvtps_epi32(vscaled89ABCDEF);
+      vaccGHIJKLMN = _mm256_cvtps_epi32(vscaledGHIJKLMN);
+      vaccOPQRSTUV = _mm256_cvtps_epi32(vscaledOPQRSTUV);
+
+      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->fp32_avx2.output_zero_point);
+      __m256i vout012389AB4567CDEF = _mm256_adds_epi16(_mm256_packs_epi32(vacc01234567, vacc89ABCDEF), voutput_zero_point);
+      __m256i voutGHIJOPQRKLMNSTUV = _mm256_adds_epi16(_mm256_packs_epi32(vaccGHIJKLMN, vaccOPQRSTUV), voutput_zero_point);
+
+      __m128i vout0123456789ABCDEF = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(vout012389AB4567CDEF), _mm256_extracti128_si256(vout012389AB4567CDEF, 1)), _MM_SHUFFLE(3, 1, 2, 0));
+      __m128i voutGHIJKLMNOPQRSTUV = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(voutGHIJOPQRKLMNSTUV), _mm256_extracti128_si256(voutGHIJOPQRKLMNSTUV, 1)), _MM_SHUFFLE(3, 1, 2, 0));
+
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+      vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
+      vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
+      voutGHIJKLMNOPQRSTUV = _mm_max_epi8(voutGHIJKLMNOPQRSTUV, voutput_min);
+      voutGHIJKLMNOPQRSTUV = _mm_min_epi8(voutGHIJKLMNOPQRSTUV, voutput_max);
+
+      _mm_storeu_si128((__m128i*) output, vout0123456789ABCDEF);
+      _mm_storeu_si128((__m128i*) (output + 16), voutGHIJKLMNOPQRSTUV);
+      output += 32;
+    }
+    if XNN_UNLIKELY(c != 0) {
+      const int8_t* k = (const int8_t*) ((uintptr_t) w + 32 * sizeof(int32_t));
+      do {
+        __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+
+
+        const __m256i vi0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i0));
+        const __m256i vk0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) k));
+        i0 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi0x01234567, vk0x01234567));
+
+        const __m256i vi1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i1));
+        const __m256i vk1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 32)));
+        i1 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi1x01234567, vk1x01234567));
+
+        const __m256i vi2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i2));
+        const __m256i vk2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 64)));
+        i2 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi2x01234567, vk2x01234567));
+
+        const __m256i vi3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i3));
+        const __m256i vk3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 96)));
+        i3 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi3x01234567, vk3x01234567));
+
+        const __m256i vi4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i4));
+        const __m256i vk4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 128)));
+        i4 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi4x01234567, vk4x01234567));
+
+        const __m256i vi5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i5));
+        const __m256i vk5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 160)));
+        i5 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi5x01234567, vk5x01234567));
+
+        const __m256i vi6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i6));
+        const __m256i vk6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 192)));
+        i6 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi6x01234567, vk6x01234567));
+
+        const __m256i vi7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i7));
+        const __m256i vk7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 224)));
+        i7 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi7x01234567, vk7x01234567));
+
+        const __m256i vi8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i8));
+        const __m256i vk8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) (k + 256)));
+        i8 += 8;
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi8x01234567, vk8x01234567));
+
+        w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
+        k += 8;
+
+        __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+        vscaled01234567 = _mm256_mul_ps(vscaled01234567, _mm256_load_ps(params->fp32_avx2.scale));
+        vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->fp32_avx2.output_zero_point);
+        __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
+
+        __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
+        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
+
+        if XNN_LIKELY(c >= 8) {
+          _mm_storel_epi64((__m128i*) output, vout0123456701234567);
+          output += 8;
+          c -= 8;
+        } else {
+          if (c & 4) {
+            *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567);
+            vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32);
+            output += 4;
+          }
+          if (c & 2) {
+            *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0);
+            vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16);
+            output += 2;
+          }
+          if (c & 1) {
+            *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0);
+            output += 1;
+          }
+          c = 0;
+        }
+      } while (c != 0);
+    }
+
+    output = (int8_t*) ((uintptr_t) output + output_increment);
+  } while (--output_width != 0);
+}
diff --git a/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul16.c b/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul16.c
index 6dfc4f9..853b119 100644
--- a/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul16.c
+++ b/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul16.c
@@ -222,9 +222,8 @@
       vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod8xOPQRSTUV));
 
       w = (const void*) ((uintptr_t) w + 32 * sizeof(int32_t) + 288 * sizeof(int8_t));
-
-      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
       const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
       const __m256i vacc9BDF = _mm256_shuffle_epi32(vacc89ABCDEF, _MM_SHUFFLE(3, 3, 1, 1));
@@ -254,7 +253,7 @@
       const __m256i vq31prodGHIJKLMN = _mm256_blend_epi16(vq31prodGIKM, vq31prodHJLN, 0xCC);
       const __m256i vq31prodOPQRSTUV = _mm256_blend_epi16(vq31prodOQSU, vq31prodPRTV, 0xCC);
 
-      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
       const __m256i vrem01234567 =
         _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
       const __m256i vrem89ABCDEF =
@@ -264,8 +263,8 @@
       const __m256i vremOPQRSTUV =
         _mm256_add_epi32(_mm256_and_si256(vq31prodOPQRSTUV, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prodOPQRSTUV));
 
-      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
+      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_avx2.shift);
       vacc01234567 =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
       vacc89ABCDEF =
@@ -275,15 +274,15 @@
       vaccOPQRSTUV =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prodOPQRSTUV, vshift), _mm256_cmpgt_epi32(vremOPQRSTUV, vremainder_threshold));
 
-      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx2.output_zero_point);
+      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_zero_point);
       const __m256i vout012389AB4567CDEF = _mm256_adds_epi16(_mm256_packs_epi32(vacc01234567, vacc89ABCDEF), voutput_zero_point);
       const __m256i voutGHIJOPQRKLMNSTUV = _mm256_adds_epi16(_mm256_packs_epi32(vaccGHIJKLMN, vaccOPQRSTUV), voutput_zero_point);
 
       __m128i vout0123456789ABCDEF = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(vout012389AB4567CDEF), _mm256_extracti128_si256(vout012389AB4567CDEF, 1)), _MM_SHUFFLE(3, 1, 2, 0));
       __m128i voutGHIJKLMNOPQRSTUV = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(voutGHIJOPQRKLMNSTUV), _mm256_extracti128_si256(voutGHIJOPQRKLMNSTUV, 1)), _MM_SHUFFLE(3, 1, 2, 0));
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
       voutGHIJKLMNOPQRSTUV = _mm_max_epi8(voutGHIJKLMNOPQRSTUV, voutput_min);
@@ -384,8 +383,8 @@
         w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t));
         k += 16;
 
-        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
         const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
         const __m256i vacc9BDF = _mm256_shuffle_epi32(vacc89ABCDEF, _MM_SHUFFLE(3, 3, 1, 1));
@@ -403,25 +402,25 @@
         const __m256i vq31prod01234567 = _mm256_blend_epi16(vq31prod0246, vq31prod1357, 0xCC);
         const __m256i vq31prod89ABCDEF = _mm256_blend_epi16(vq31prod8ACE, vq31prod9BDF, 0xCC);
 
-        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
         const __m256i vrem01234567 =
           _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
         const __m256i vrem89ABCDEF =
           _mm256_add_epi32(_mm256_and_si256(vq31prod89ABCDEF, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod89ABCDEF));
 
-        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
+        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_avx2.shift);
         vacc01234567 =
           _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
         vacc89ABCDEF =
           _mm256_sub_epi32(_mm256_sra_epi32(vq31prod89ABCDEF, vshift), _mm256_cmpgt_epi32(vrem89ABCDEF, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->avx2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
         __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc89ABCDEF), _mm256_extracti128_si256(vacc89ABCDEF, 1)), voutput_zero_point);
 
-        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
-        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
 
         __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF);
         vout0123456789ABCDEF = _mm_min_epi8(_mm_max_epi8(vout0123456789ABCDEF, voutput_min), voutput_max);
diff --git a/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul32.c b/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul32.c
index 8e3923a..cc5dcf2 100644
--- a/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul32.c
+++ b/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx2-mul32.c
@@ -223,8 +223,8 @@
 
       w = (const void*) ((uintptr_t) w + 32 * sizeof(int32_t) + 288 * sizeof(int8_t));
 
-      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
       const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
       const __m256i vacc9BDF = _mm256_shuffle_epi32(vacc89ABCDEF, _MM_SHUFFLE(3, 3, 1, 1));
@@ -254,7 +254,7 @@
       const __m256i vq31prodGHIJKLMN = _mm256_blend_epi16(vq31prodGIKM, vq31prodHJLN, 0xCC);
       const __m256i vq31prodOPQRSTUV = _mm256_blend_epi16(vq31prodOQSU, vq31prodPRTV, 0xCC);
 
-      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
       const __m256i vrem01234567 =
         _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
       const __m256i vrem89ABCDEF =
@@ -264,8 +264,8 @@
       const __m256i vremOPQRSTUV =
         _mm256_add_epi32(_mm256_and_si256(vq31prodOPQRSTUV, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prodOPQRSTUV));
 
-      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
+      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_avx2.shift);
       vacc01234567 =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
       vacc89ABCDEF =
@@ -275,15 +275,15 @@
       vaccOPQRSTUV =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prodOPQRSTUV, vshift), _mm256_cmpgt_epi32(vremOPQRSTUV, vremainder_threshold));
 
-      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx2.output_zero_point);
+      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_zero_point);
       __m256i vout012389AB4567CDEF = _mm256_adds_epi16(_mm256_packs_epi32(vacc01234567, vacc89ABCDEF), voutput_zero_point);
       __m256i voutGHIJOPQRKLMNSTUV = _mm256_adds_epi16(_mm256_packs_epi32(vaccGHIJKLMN, vaccOPQRSTUV), voutput_zero_point);
 
       __m128i vout0123456789ABCDEF = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(vout012389AB4567CDEF), _mm256_extracti128_si256(vout012389AB4567CDEF, 1)), _MM_SHUFFLE(3, 1, 2, 0));
       __m128i voutGHIJKLMNOPQRSTUV = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(voutGHIJOPQRKLMNSTUV), _mm256_extracti128_si256(voutGHIJOPQRKLMNSTUV, 1)), _MM_SHUFFLE(3, 1, 2, 0));
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
       vout0123456789ABCDEF = _mm_max_epi8(vout0123456789ABCDEF, voutput_min);
       vout0123456789ABCDEF = _mm_min_epi8(vout0123456789ABCDEF, voutput_max);
       voutGHIJKLMNOPQRSTUV = _mm_max_epi8(voutGHIJKLMNOPQRSTUV, voutput_min);
@@ -356,8 +356,8 @@
         w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
         k += 8;
 
-        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
         const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -369,21 +369,21 @@
 
         const __m256i vq31prod01234567 = _mm256_blend_epi16(vq31prod0246, vq31prod1357, 0xCC);
 
-        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
         const __m256i vrem01234567 =
           _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
 
-        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-        const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
+        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+        const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.shift);
         vacc01234567 =
           _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->avx2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
-        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
-        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
         vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
         vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
 
diff --git a/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx512skx-mul32.c b/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx512skx-mul32.c
index 900c18b..9db9dbf 100644
--- a/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx512skx-mul32.c
+++ b/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-avx512skx-mul32.c
@@ -31,14 +31,14 @@
   assert(output_width != 0);
 
   const __mmask16 vblend_mask = _cvtu32_mask16(0xAAAA);
-  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.multiplier));
-  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.rounding));
-  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.remainder_mask));
-  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.remainder_threshold));
-  const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
-  const __m512i voutput_zero_point = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.output_zero_point));
-  const __m512i voutput_min = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.output_min));
-  const __m512i voutput_max = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.output_max));
+  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier));
+  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding));
+  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask));
+  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold));
+  const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
+  const __m512i voutput_zero_point = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point));
+  const __m512i voutput_min = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min));
+  const __m512i voutput_max = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max));
   const __m256i vpermute_mask = _mm256_set_epi32(7, 3, 5, 1, 6, 2, 4, 0);
 
   do {
diff --git a/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-neon-mul16.c b/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-neon-mul16.c
index 065b9fb..1e37af6 100644
--- a/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-neon-mul16.c
+++ b/src/qs8-dwconv/gen/up32x9-minmax-gemmlowp-neon-mul16.c
@@ -29,12 +29,12 @@
   assert(channels != 0);
   assert(output_width != 0);
 
-  const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
-  const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+  const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
+  const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
   const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
-  const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
-  const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-  const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+  const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
+  const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+  const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
   do {
     const int8_t* i0 = input[0];
     assert(i0 != NULL);
diff --git a/src/qs8-dwconv/gen/up4x25-minmax-gemmlowp-scalar.c b/src/qs8-dwconv/gen/up4x25-minmax-gemmlowp-scalar.c
index b4232a6..850ca9b 100644
--- a/src/qs8-dwconv/gen/up4x25-minmax-gemmlowp-scalar.c
+++ b/src/qs8-dwconv/gen/up4x25-minmax-gemmlowp-scalar.c
@@ -28,14 +28,14 @@
   assert(channels != 0);
   assert(output_width != 0);
 
-  const int32_t vmultiplier = params->scalar.multiplier;
+  const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
   const int64_t vq31rounding = INT64_C(0x40000000);
-  const int32_t vremainder_mask = params->scalar.remainder_mask;
-  const uint32_t vshift = params->scalar.shift;
-  const int32_t vremainder_threshold = params->scalar.remainder_threshold;
-  const int32_t vout_min = params->scalar.output_min_less_zero_point;
-  const int32_t vout_max = params->scalar.output_max_less_zero_point;
-  const int32_t voutput_zero_point = params->scalar.output_zero_point;
+  const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
+  const uint32_t vshift = params->gemmlowp_scalar.shift;
+  const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
+  const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
+  const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
+  const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
   do {
     const int8_t* i0 = input[0];
     assert(i0 != NULL);
diff --git a/src/qs8-dwconv/gen/up4x9-minmax-gemmlowp-scalar.c b/src/qs8-dwconv/gen/up4x9-minmax-gemmlowp-scalar.c
index 402518f..b6da6c7 100644
--- a/src/qs8-dwconv/gen/up4x9-minmax-gemmlowp-scalar.c
+++ b/src/qs8-dwconv/gen/up4x9-minmax-gemmlowp-scalar.c
@@ -28,14 +28,14 @@
   assert(channels != 0);
   assert(output_width != 0);
 
-  const int32_t vmultiplier = params->scalar.multiplier;
+  const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
   const int64_t vq31rounding = INT64_C(0x40000000);
-  const int32_t vremainder_mask = params->scalar.remainder_mask;
-  const uint32_t vshift = params->scalar.shift;
-  const int32_t vremainder_threshold = params->scalar.remainder_threshold;
-  const int32_t vout_min = params->scalar.output_min_less_zero_point;
-  const int32_t vout_max = params->scalar.output_max_less_zero_point;
-  const int32_t voutput_zero_point = params->scalar.output_zero_point;
+  const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
+  const uint32_t vshift = params->gemmlowp_scalar.shift;
+  const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
+  const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
+  const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
+  const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
   do {
     const int8_t* i0 = input[0];
     assert(i0 != NULL);
diff --git a/src/qs8-dwconv/gen/up8x25-minmax-fp32-avx2-mul32.c b/src/qs8-dwconv/gen/up8x25-minmax-fp32-avx2-mul32.c
new file mode 100644
index 0000000..a8af432
--- /dev/null
+++ b/src/qs8-dwconv/gen/up8x25-minmax-fp32-avx2-mul32.c
@@ -0,0 +1,501 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-dwconv/unipass-avx2-mul32.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <immintrin.h>
+
+#include <xnnpack/dwconv.h>
+
+
+void xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32(
+    size_t channels,
+    size_t output_width,
+    const int8_t** input,
+    const void* weights,
+    int8_t* output,
+    size_t input_stride,
+    size_t output_increment,
+    size_t input_offset,
+    const int8_t* zero,
+    const union xnn_qs8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(channels != 0);
+  assert(output_width != 0);
+
+  do {
+    const int8_t* i0 = input[0];
+    assert(i0 != NULL);
+    if XNN_UNPREDICTABLE(i0 != zero) {
+      i0 = (const int8_t*) ((uintptr_t) i0 + input_offset);
+    }
+    const int8_t* i1 = input[1];
+    assert(i1 != NULL);
+    if XNN_UNPREDICTABLE(i1 != zero) {
+      i1 = (const int8_t*) ((uintptr_t) i1 + input_offset);
+    }
+    const int8_t* i2 = input[2];
+    assert(i2 != NULL);
+    if XNN_UNPREDICTABLE(i2 != zero) {
+      i2 = (const int8_t*) ((uintptr_t) i2 + input_offset);
+    }
+    const int8_t* i3 = input[3];
+    assert(i3 != NULL);
+    if XNN_UNPREDICTABLE(i3 != zero) {
+      i3 = (const int8_t*) ((uintptr_t) i3 + input_offset);
+    }
+    const int8_t* i4 = input[4];
+    assert(i4 != NULL);
+    if XNN_UNPREDICTABLE(i4 != zero) {
+      i4 = (const int8_t*) ((uintptr_t) i4 + input_offset);
+    }
+    const int8_t* i5 = input[5];
+    assert(i5 != NULL);
+    if XNN_UNPREDICTABLE(i5 != zero) {
+      i5 = (const int8_t*) ((uintptr_t) i5 + input_offset);
+    }
+    const int8_t* i6 = input[6];
+    assert(i6 != NULL);
+    if XNN_UNPREDICTABLE(i6 != zero) {
+      i6 = (const int8_t*) ((uintptr_t) i6 + input_offset);
+    }
+    const int8_t* i7 = input[7];
+    assert(i7 != NULL);
+    if XNN_UNPREDICTABLE(i7 != zero) {
+      i7 = (const int8_t*) ((uintptr_t) i7 + input_offset);
+    }
+    const int8_t* i8 = input[8];
+    assert(i8 != NULL);
+    if XNN_UNPREDICTABLE(i8 != zero) {
+      i8 = (const int8_t*) ((uintptr_t) i8 + input_offset);
+    }
+    const int8_t* i9 = input[9];
+    assert(i9 != NULL);
+    if XNN_UNPREDICTABLE(i9 != zero) {
+      i9 = (const int8_t*) ((uintptr_t) i9 + input_offset);
+    }
+    const int8_t* i10 = input[10];
+    assert(i10 != NULL);
+    if XNN_UNPREDICTABLE(i10 != zero) {
+      i10 = (const int8_t*) ((uintptr_t) i10 + input_offset);
+    }
+    const int8_t* i11 = input[11];
+    assert(i11 != NULL);
+    if XNN_UNPREDICTABLE(i11 != zero) {
+      i11 = (const int8_t*) ((uintptr_t) i11 + input_offset);
+    }
+    const int8_t* i12 = input[12];
+    assert(i12 != NULL);
+    if XNN_UNPREDICTABLE(i12 != zero) {
+      i12 = (const int8_t*) ((uintptr_t) i12 + input_offset);
+    }
+    const int8_t* i13 = input[13];
+    assert(i13 != NULL);
+    if XNN_UNPREDICTABLE(i13 != zero) {
+      i13 = (const int8_t*) ((uintptr_t) i13 + input_offset);
+    }
+    const int8_t* i14 = input[14];
+    assert(i14 != NULL);
+    if XNN_UNPREDICTABLE(i14 != zero) {
+      i14 = (const int8_t*) ((uintptr_t) i14 + input_offset);
+    }
+    const int8_t* i15 = input[15];
+    assert(i15 != NULL);
+    if XNN_UNPREDICTABLE(i15 != zero) {
+      i15 = (const int8_t*) ((uintptr_t) i15 + input_offset);
+    }
+    const int8_t* i16 = input[16];
+    assert(i16 != NULL);
+    if XNN_UNPREDICTABLE(i16 != zero) {
+      i16 = (const int8_t*) ((uintptr_t) i16 + input_offset);
+    }
+    const int8_t* i17 = input[17];
+    assert(i17 != NULL);
+    if XNN_UNPREDICTABLE(i17 != zero) {
+      i17 = (const int8_t*) ((uintptr_t) i17 + input_offset);
+    }
+    const int8_t* i18 = input[18];
+    assert(i18 != NULL);
+    if XNN_UNPREDICTABLE(i18 != zero) {
+      i18 = (const int8_t*) ((uintptr_t) i18 + input_offset);
+    }
+    const int8_t* i19 = input[19];
+    assert(i19 != NULL);
+    if XNN_UNPREDICTABLE(i19 != zero) {
+      i19 = (const int8_t*) ((uintptr_t) i19 + input_offset);
+    }
+    const int8_t* i20 = input[20];
+    assert(i20 != NULL);
+    if XNN_UNPREDICTABLE(i20 != zero) {
+      i20 = (const int8_t*) ((uintptr_t) i20 + input_offset);
+    }
+    const int8_t* i21 = input[21];
+    assert(i21 != NULL);
+    if XNN_UNPREDICTABLE(i21 != zero) {
+      i21 = (const int8_t*) ((uintptr_t) i21 + input_offset);
+    }
+    const int8_t* i22 = input[22];
+    assert(i22 != NULL);
+    if XNN_UNPREDICTABLE(i22 != zero) {
+      i22 = (const int8_t*) ((uintptr_t) i22 + input_offset);
+    }
+    const int8_t* i23 = input[23];
+    assert(i23 != NULL);
+    if XNN_UNPREDICTABLE(i23 != zero) {
+      i23 = (const int8_t*) ((uintptr_t) i23 + input_offset);
+    }
+    const int8_t* i24 = input[24];
+    assert(i24 != NULL);
+    if XNN_UNPREDICTABLE(i24 != zero) {
+      i24 = (const int8_t*) ((uintptr_t) i24 + input_offset);
+    }
+    input = (const int8_t**) ((uintptr_t) input + input_stride);
+
+    size_t c = channels;
+    const void* w = weights;
+    for (; c >= 8; c -= 8) {
+      __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+
+
+      const __m256i vi0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i0));
+      const __m256i vk0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 0 * sizeof(int8_t))));
+      i0 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi0x01234567, vk0x01234567));
+
+      const __m256i vi1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i1));
+      const __m256i vk1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 8 * sizeof(int8_t))));
+      i1 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi1x01234567, vk1x01234567));
+
+      const __m256i vi2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i2));
+      const __m256i vk2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 16 * sizeof(int8_t))));
+      i2 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi2x01234567, vk2x01234567));
+
+      const __m256i vi3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i3));
+      const __m256i vk3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 24 * sizeof(int8_t))));
+      i3 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi3x01234567, vk3x01234567));
+
+      const __m256i vi4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i4));
+      const __m256i vk4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 32 * sizeof(int8_t))));
+      i4 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi4x01234567, vk4x01234567));
+
+      const __m256i vi5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i5));
+      const __m256i vk5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 40 * sizeof(int8_t))));
+      i5 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi5x01234567, vk5x01234567));
+
+      const __m256i vi6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i6));
+      const __m256i vk6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 48 * sizeof(int8_t))));
+      i6 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi6x01234567, vk6x01234567));
+
+      const __m256i vi7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i7));
+      const __m256i vk7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 56 * sizeof(int8_t))));
+      i7 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi7x01234567, vk7x01234567));
+
+      const __m256i vi8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i8));
+      const __m256i vk8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 64 * sizeof(int8_t))));
+      i8 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi8x01234567, vk8x01234567));
+
+      const __m256i vi9x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i9));
+      const __m256i vk9x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 72 * sizeof(int8_t))));
+      i9 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi9x01234567, vk9x01234567));
+
+      const __m256i vi10x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i10));
+      const __m256i vk10x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 80 * sizeof(int8_t))));
+      i10 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi10x01234567, vk10x01234567));
+
+      const __m256i vi11x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i11));
+      const __m256i vk11x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 88 * sizeof(int8_t))));
+      i11 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi11x01234567, vk11x01234567));
+
+      const __m256i vi12x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i12));
+      const __m256i vk12x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 96 * sizeof(int8_t))));
+      i12 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi12x01234567, vk12x01234567));
+
+      const __m256i vi13x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i13));
+      const __m256i vk13x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 104 * sizeof(int8_t))));
+      i13 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi13x01234567, vk13x01234567));
+
+      const __m256i vi14x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i14));
+      const __m256i vk14x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 112 * sizeof(int8_t))));
+      i14 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi14x01234567, vk14x01234567));
+
+      const __m256i vi15x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i15));
+      const __m256i vk15x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 120 * sizeof(int8_t))));
+      i15 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi15x01234567, vk15x01234567));
+
+      const __m256i vi16x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i16));
+      const __m256i vk16x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 128 * sizeof(int8_t))));
+      i16 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi16x01234567, vk16x01234567));
+
+      const __m256i vi17x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i17));
+      const __m256i vk17x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 136 * sizeof(int8_t))));
+      i17 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi17x01234567, vk17x01234567));
+
+      const __m256i vi18x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i18));
+      const __m256i vk18x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 144 * sizeof(int8_t))));
+      i18 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi18x01234567, vk18x01234567));
+
+      const __m256i vi19x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i19));
+      const __m256i vk19x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 152 * sizeof(int8_t))));
+      i19 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi19x01234567, vk19x01234567));
+
+      const __m256i vi20x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i20));
+      const __m256i vk20x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 160 * sizeof(int8_t))));
+      i20 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi20x01234567, vk20x01234567));
+
+      const __m256i vi21x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i21));
+      const __m256i vk21x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 168 * sizeof(int8_t))));
+      i21 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi21x01234567, vk21x01234567));
+
+      const __m256i vi22x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i22));
+      const __m256i vk22x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 176 * sizeof(int8_t))));
+      i22 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi22x01234567, vk22x01234567));
+
+      const __m256i vi23x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i23));
+      const __m256i vk23x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 184 * sizeof(int8_t))));
+      i23 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi23x01234567, vk23x01234567));
+
+      const __m256i vi24x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i24));
+      const __m256i vk24x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 192 * sizeof(int8_t))));
+      i24 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi24x01234567, vk24x01234567));
+
+      w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t) + 200 * sizeof(int8_t));
+
+      __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+
+      const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+      vscaled01234567 = _mm256_mul_ps(vscaled01234567, vscale);
+
+      vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->fp32_avx2.output_zero_point);
+      __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
+
+      __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
+
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+      vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
+      vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
+
+      _mm_storel_epi64((__m128i*) output, vout0123456701234567);
+      output += 8;
+    }
+    if XNN_UNLIKELY(c != 0) {
+      {
+        __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+
+
+        const __m256i vi0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i0));
+        const __m256i vk0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 0 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi0x01234567, vk0x01234567));
+
+        const __m256i vi1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i1));
+        const __m256i vk1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 8 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi1x01234567, vk1x01234567));
+
+        const __m256i vi2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i2));
+        const __m256i vk2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 16 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi2x01234567, vk2x01234567));
+
+        const __m256i vi3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i3));
+        const __m256i vk3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 24 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi3x01234567, vk3x01234567));
+
+        const __m256i vi4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i4));
+        const __m256i vk4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 32 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi4x01234567, vk4x01234567));
+
+        const __m256i vi5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i5));
+        const __m256i vk5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 40 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi5x01234567, vk5x01234567));
+
+        const __m256i vi6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i6));
+        const __m256i vk6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 48 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi6x01234567, vk6x01234567));
+
+        const __m256i vi7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i7));
+        const __m256i vk7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 56 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi7x01234567, vk7x01234567));
+
+        const __m256i vi8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i8));
+        const __m256i vk8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 64 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi8x01234567, vk8x01234567));
+
+        const __m256i vi9x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i9));
+        const __m256i vk9x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 72 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi9x01234567, vk9x01234567));
+
+        const __m256i vi10x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i10));
+        const __m256i vk10x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 80 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi10x01234567, vk10x01234567));
+
+        const __m256i vi11x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i11));
+        const __m256i vk11x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 88 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi11x01234567, vk11x01234567));
+
+        const __m256i vi12x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i12));
+        const __m256i vk12x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 96 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi12x01234567, vk12x01234567));
+
+        const __m256i vi13x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i13));
+        const __m256i vk13x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 104 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi13x01234567, vk13x01234567));
+
+        const __m256i vi14x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i14));
+        const __m256i vk14x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 112 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi14x01234567, vk14x01234567));
+
+        const __m256i vi15x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i15));
+        const __m256i vk15x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 120 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi15x01234567, vk15x01234567));
+
+        const __m256i vi16x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i16));
+        const __m256i vk16x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 128 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi16x01234567, vk16x01234567));
+
+        const __m256i vi17x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i17));
+        const __m256i vk17x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 136 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi17x01234567, vk17x01234567));
+
+        const __m256i vi18x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i18));
+        const __m256i vk18x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 144 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi18x01234567, vk18x01234567));
+
+        const __m256i vi19x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i19));
+        const __m256i vk19x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 152 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi19x01234567, vk19x01234567));
+
+        const __m256i vi20x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i20));
+        const __m256i vk20x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 160 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi20x01234567, vk20x01234567));
+
+        const __m256i vi21x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i21));
+        const __m256i vk21x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 168 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi21x01234567, vk21x01234567));
+
+        const __m256i vi22x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i22));
+        const __m256i vk22x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 176 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi22x01234567, vk22x01234567));
+
+        const __m256i vi23x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i23));
+        const __m256i vk23x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 184 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi23x01234567, vk23x01234567));
+
+        const __m256i vi24x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i24));
+        const __m256i vk24x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 192 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi24x01234567, vk24x01234567));
+
+
+        __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+        vscaled01234567 = _mm256_mul_ps(vscaled01234567, _mm256_load_ps(params->fp32_avx2.scale));
+        vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->fp32_avx2.output_zero_point);
+        __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
+
+        __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
+        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
+
+        if (c & 4) {
+          *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567);
+          vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32);
+          output += 4;
+        }
+        if (c & 2) {
+          *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0);
+          vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16);
+          output += 2;
+        }
+        if (c & 1) {
+          *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0);
+          output += 1;
+        }
+      }
+    }
+
+    output = (int8_t*) ((uintptr_t) output + output_increment);
+  } while (--output_width != 0);
+}
diff --git a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx-mul16.c b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx-mul16.c
index 570bc8f..185b124 100644
--- a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx-mul16.c
+++ b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx-mul16.c
@@ -491,8 +491,8 @@
 
       w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t) + 200 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -509,29 +509,29 @@
       const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
       const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
         _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
 
       __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
       vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
 
       _mm_storel_epi64((__m128i*) output, vout0123456701234567);
@@ -844,8 +844,8 @@
         vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp24x01234567lo, vp24x01234567hi));
 
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
         const __m128i vacc57 = _mm_shuffle_epi32(vacc4567, _MM_SHUFFLE(3, 3, 1, 1));
@@ -864,27 +864,27 @@
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
         const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
-        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if (c & 4) {
           *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567);
diff --git a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx-mul32.c b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx-mul32.c
index 5f0c9bd..d9aa725 100644
--- a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx-mul32.c
+++ b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx-mul32.c
@@ -392,8 +392,8 @@
 
       w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t) + 200 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -410,24 +410,24 @@
       const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
       const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
         _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
       vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
       vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
@@ -594,8 +594,8 @@
         w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t));
         k += 4;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -607,21 +607,21 @@
 
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc0123), voutput_zero_point);
 
         vout0123 = _mm_packs_epi16(vout0123, vout0123);
-        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 4) {
           _mm_storeu_si32(output, vout0123);
diff --git a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx2-mul32.c b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx2-mul32.c
index 52c1b39..7734857 100644
--- a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx2-mul32.c
+++ b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-avx2-mul32.c
@@ -315,8 +315,8 @@
 
       w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t) + 200 * sizeof(int8_t));
 
-      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
       const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -328,22 +328,22 @@
 
       const __m256i vq31prod01234567 = _mm256_blend_epi16(vq31prod0246, vq31prod1357, 0xCC);
 
-      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
       const __m256i vrem01234567 =
         _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
 
-      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-      const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
+      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+      const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.shift);
       vacc01234567 =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->avx2.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
 
       __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
       vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
       vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
 
@@ -481,8 +481,8 @@
         vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi24x01234567, vk24x01234567));
 
 
-        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
         const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -494,21 +494,21 @@
 
         const __m256i vq31prod01234567 = _mm256_blend_epi16(vq31prod0246, vq31prod1357, 0xCC);
 
-        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
         const __m256i vrem01234567 =
           _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
 
-        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-        const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
+        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+        const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.shift);
         vacc01234567 =
           _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->avx2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
-        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
-        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
         vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
         vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
 
diff --git a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-neon-mul16.c b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-neon-mul16.c
index 7c4e207..8a613da 100644
--- a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-neon-mul16.c
+++ b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-neon-mul16.c
@@ -29,12 +29,12 @@
   assert(channels != 0);
   assert(output_width != 0);
 
-  const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
-  const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+  const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
+  const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
   const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
-  const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
-  const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-  const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+  const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
+  const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+  const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
   do {
     const int8_t* i0 = input[0];
     assert(i0 != NULL);
diff --git a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse2-mul16.c b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse2-mul16.c
index ab59747..de6b71d 100644
--- a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse2-mul16.c
+++ b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse2-mul16.c
@@ -491,8 +491,8 @@
 
       w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t) + 200 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
       const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
       const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -530,26 +530,26 @@
       const __m128i vq31prod0123 = _mm_shuffle_epi32(vq31prod0213, _MM_SHUFFLE(3, 1, 2, 0));
       const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0));
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
         _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
       vout01234567 = _mm_max_epi16(vout01234567, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
       vout01234567 = _mm_min_epi16(vout01234567, voutput_max);
 
       __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
@@ -865,8 +865,8 @@
         vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp24x01234567lo, vp24x01234567hi));
 
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
         const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
         const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -905,24 +905,24 @@
         const __m128i vq31prod0123 = _mm_shuffle_epi32(vq31prod0213, _MM_SHUFFLE(3, 1, 2, 0));
         const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0));
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
-        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_min));
-        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_max));
+        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min));
+        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max));
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
diff --git a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse41-mul16.c b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse41-mul16.c
index 13c53f9..d06566d 100644
--- a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse41-mul16.c
+++ b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse41-mul16.c
@@ -491,8 +491,8 @@
 
       w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t) + 200 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -509,29 +509,29 @@
       const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
       const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
         _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
 
       __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
       vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
 
       _mm_storel_epi64((__m128i*) output, vout0123456701234567);
@@ -844,8 +844,8 @@
         vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp24x01234567lo, vp24x01234567hi));
 
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
         const __m128i vacc57 = _mm_shuffle_epi32(vacc4567, _MM_SHUFFLE(3, 3, 1, 1));
@@ -864,27 +864,27 @@
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
         const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
-        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if (c & 4) {
           *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567);
diff --git a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse41-mul32.c b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse41-mul32.c
index 8b636e3..fba3b5e 100644
--- a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse41-mul32.c
+++ b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-sse41-mul32.c
@@ -392,8 +392,8 @@
 
       w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t) + 200 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -410,24 +410,24 @@
       const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
       const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
         _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
       vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
       vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
@@ -594,8 +594,8 @@
         w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t));
         k += 4;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -607,21 +607,21 @@
 
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc0123), voutput_zero_point);
 
         vout0123 = _mm_packs_epi16(vout0123, vout0123);
-        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 4) {
           _mm_storeu_si32(output, vout0123);
diff --git a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-ssse3-mul16.c b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-ssse3-mul16.c
index 8e54b28..9d0875d 100644
--- a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-ssse3-mul16.c
+++ b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-ssse3-mul16.c
@@ -491,8 +491,8 @@
 
       w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t) + 200 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
       const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
       const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -530,26 +530,26 @@
       const __m128i vq31prod0123 = _mm_shuffle_epi32(vq31prod0213, _MM_SHUFFLE(3, 1, 2, 0));
       const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0));
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
         _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
       vout01234567 = _mm_max_epi16(vout01234567, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
       vout01234567 = _mm_min_epi16(vout01234567, voutput_max);
 
       __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
@@ -865,8 +865,8 @@
         vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp24x01234567lo, vp24x01234567hi));
 
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
         const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
         const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -905,24 +905,24 @@
         const __m128i vq31prod0123 = _mm_shuffle_epi32(vq31prod0213, _MM_SHUFFLE(3, 1, 2, 0));
         const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0));
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
-        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_min));
-        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_max));
+        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min));
+        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max));
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
diff --git a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-wasmsimd-mul16.c b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-wasmsimd-mul16.c
index 0c39bd3..1e757fa 100644
--- a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-wasmsimd-mul16.c
+++ b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-wasmsimd-mul16.c
@@ -399,8 +399,8 @@
       const v128_t vacc45 = wasm_v32x4_shuffle(vacc4567, vsign4567, 0, 4, 1, 5);
       const v128_t vacc67 = wasm_v32x4_shuffle(vacc4567, vsign4567, 2, 6, 3, 7);
 
-      const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-      const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+      const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+      const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
       const v128_t vprod01 = wasm_i64x2_add(wasm_i64x2_mul(vacc01, vmultiplier), vrounding);
       const v128_t vprod23 = wasm_i64x2_add(wasm_i64x2_mul(vacc23, vmultiplier), vrounding);
       const v128_t vprod45 = wasm_i64x2_add(wasm_i64x2_mul(vacc45, vmultiplier), vrounding);
@@ -409,20 +409,20 @@
       const v128_t vq31prod0123 = wasm_v32x4_shuffle(vprod01, vprod23, 1, 3, 5, 7);
       const v128_t vq31prod4567 = wasm_v32x4_shuffle(vprod45, vprod67, 1, 3, 5, 7);
 
-      const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+      const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
       const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0123, vremainder_mask), wasm_i32x4_shr(vq31prod0123, 31));
       const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vq31prod4567, vremainder_mask), wasm_i32x4_shr(vq31prod4567, 31));
 
-      const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-      const int32_t vshift = params->wasmsimd.shift;
+      const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+      const int32_t vshift = params->gemmlowp_wasmsimd.shift;
       vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0123, vshift), wasm_i32x4_gt(vrem0123, vthreshold));
       vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod4567, vshift), wasm_i32x4_gt(vrem4567, vthreshold));
 
-      const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+      const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
       v128_t vout01234567 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0123, vacc4567), voutput_zero_point);
 
-      const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
-      const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+      const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
+      const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
       v128_t vout0123456701234567 = wasm_i8x16_min(wasm_i8x16_max(wasm_i8x16_narrow_i16x8(vout01234567, vout01234567), voutput_min), voutput_max);
 
       *((double*) output) = wasm_f64x2_extract_lane(vout0123456701234567, 0);
@@ -643,8 +643,8 @@
       const v128_t vacc45 = wasm_v32x4_shuffle(vacc4567, vsign4567, 0, 4, 1, 5);
       const v128_t vacc67 = wasm_v32x4_shuffle(vacc4567, vsign4567, 2, 6, 3, 7);
 
-      const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-      const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+      const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+      const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
       const v128_t vprod01 = wasm_i64x2_add(wasm_i64x2_mul(vacc01, vmultiplier), vrounding);
       const v128_t vprod23 = wasm_i64x2_add(wasm_i64x2_mul(vacc23, vmultiplier), vrounding);
       const v128_t vprod45 = wasm_i64x2_add(wasm_i64x2_mul(vacc45, vmultiplier), vrounding);
@@ -653,20 +653,20 @@
       const v128_t vq31prod0123 = wasm_v32x4_shuffle(vprod01, vprod23, 1, 3, 5, 7);
       const v128_t vq31prod4567 = wasm_v32x4_shuffle(vprod45, vprod67, 1, 3, 5, 7);
 
-      const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+      const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
       const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0123, vremainder_mask), wasm_i32x4_shr(vq31prod0123, 31));
       const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vq31prod4567, vremainder_mask), wasm_i32x4_shr(vq31prod4567, 31));
 
-      const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-      const int32_t vshift = params->wasmsimd.shift;
+      const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+      const int32_t vshift = params->gemmlowp_wasmsimd.shift;
       vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0123, vshift), wasm_i32x4_gt(vrem0123, vthreshold));
       vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod4567, vshift), wasm_i32x4_gt(vrem4567, vthreshold));
 
-      const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+      const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
       v128_t vout01234567 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0123, vacc4567), voutput_zero_point);
 
-      const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
-      const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+      const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
+      const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
       v128_t vout0123456701234567 = wasm_i8x16_min(wasm_i8x16_max(wasm_i8x16_narrow_i16x8(vout01234567, vout01234567), voutput_min), voutput_max);
 
       if (c & 4) {
diff --git a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-xop-mul32.c b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-xop-mul32.c
index 777be2d..a3d67a8 100644
--- a/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-xop-mul32.c
+++ b/src/qs8-dwconv/gen/up8x25-minmax-gemmlowp-xop-mul32.c
@@ -397,8 +397,8 @@
 
       w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t) + 200 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -415,24 +415,24 @@
       const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
       const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
         _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
       vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
       vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
@@ -599,8 +599,8 @@
         w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t));
         k += 4;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -612,21 +612,21 @@
 
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc0123), voutput_zero_point);
 
         vout0123 = _mm_packs_epi16(vout0123, vout0123);
-        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 4) {
           _mm_storeu_si32(output, vout0123);
diff --git a/src/qs8-dwconv/gen/up8x9-minmax-fp32-avx2-mul32.c b/src/qs8-dwconv/gen/up8x9-minmax-fp32-avx2-mul32.c
new file mode 100644
index 0000000..fb18b6c
--- /dev/null
+++ b/src/qs8-dwconv/gen/up8x9-minmax-fp32-avx2-mul32.c
@@ -0,0 +1,245 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-dwconv/unipass-avx2-mul32.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <immintrin.h>
+
+#include <xnnpack/dwconv.h>
+
+
+void xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32(
+    size_t channels,
+    size_t output_width,
+    const int8_t** input,
+    const void* weights,
+    int8_t* output,
+    size_t input_stride,
+    size_t output_increment,
+    size_t input_offset,
+    const int8_t* zero,
+    const union xnn_qs8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN
+{
+  assert(channels != 0);
+  assert(output_width != 0);
+
+  do {
+    const int8_t* i0 = input[0];
+    assert(i0 != NULL);
+    if XNN_UNPREDICTABLE(i0 != zero) {
+      i0 = (const int8_t*) ((uintptr_t) i0 + input_offset);
+    }
+    const int8_t* i1 = input[1];
+    assert(i1 != NULL);
+    if XNN_UNPREDICTABLE(i1 != zero) {
+      i1 = (const int8_t*) ((uintptr_t) i1 + input_offset);
+    }
+    const int8_t* i2 = input[2];
+    assert(i2 != NULL);
+    if XNN_UNPREDICTABLE(i2 != zero) {
+      i2 = (const int8_t*) ((uintptr_t) i2 + input_offset);
+    }
+    const int8_t* i3 = input[3];
+    assert(i3 != NULL);
+    if XNN_UNPREDICTABLE(i3 != zero) {
+      i3 = (const int8_t*) ((uintptr_t) i3 + input_offset);
+    }
+    const int8_t* i4 = input[4];
+    assert(i4 != NULL);
+    if XNN_UNPREDICTABLE(i4 != zero) {
+      i4 = (const int8_t*) ((uintptr_t) i4 + input_offset);
+    }
+    const int8_t* i5 = input[5];
+    assert(i5 != NULL);
+    if XNN_UNPREDICTABLE(i5 != zero) {
+      i5 = (const int8_t*) ((uintptr_t) i5 + input_offset);
+    }
+    const int8_t* i6 = input[6];
+    assert(i6 != NULL);
+    if XNN_UNPREDICTABLE(i6 != zero) {
+      i6 = (const int8_t*) ((uintptr_t) i6 + input_offset);
+    }
+    const int8_t* i7 = input[7];
+    assert(i7 != NULL);
+    if XNN_UNPREDICTABLE(i7 != zero) {
+      i7 = (const int8_t*) ((uintptr_t) i7 + input_offset);
+    }
+    const int8_t* i8 = input[8];
+    assert(i8 != NULL);
+    if XNN_UNPREDICTABLE(i8 != zero) {
+      i8 = (const int8_t*) ((uintptr_t) i8 + input_offset);
+    }
+    input = (const int8_t**) ((uintptr_t) input + input_stride);
+
+    size_t c = channels;
+    const void* w = weights;
+    for (; c >= 8; c -= 8) {
+      __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+
+
+      const __m256i vi0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i0));
+      const __m256i vk0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 0 * sizeof(int8_t))));
+      i0 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi0x01234567, vk0x01234567));
+
+      const __m256i vi1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i1));
+      const __m256i vk1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 8 * sizeof(int8_t))));
+      i1 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi1x01234567, vk1x01234567));
+
+      const __m256i vi2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i2));
+      const __m256i vk2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 16 * sizeof(int8_t))));
+      i2 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi2x01234567, vk2x01234567));
+
+      const __m256i vi3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i3));
+      const __m256i vk3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 24 * sizeof(int8_t))));
+      i3 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi3x01234567, vk3x01234567));
+
+      const __m256i vi4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i4));
+      const __m256i vk4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 32 * sizeof(int8_t))));
+      i4 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi4x01234567, vk4x01234567));
+
+      const __m256i vi5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i5));
+      const __m256i vk5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 40 * sizeof(int8_t))));
+      i5 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi5x01234567, vk5x01234567));
+
+      const __m256i vi6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i6));
+      const __m256i vk6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 48 * sizeof(int8_t))));
+      i6 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi6x01234567, vk6x01234567));
+
+      const __m256i vi7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i7));
+      const __m256i vk7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 56 * sizeof(int8_t))));
+      i7 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi7x01234567, vk7x01234567));
+
+      const __m256i vi8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i8));
+      const __m256i vk8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 64 * sizeof(int8_t))));
+      i8 += 8;
+
+      vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi8x01234567, vk8x01234567));
+
+      w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t) + 72 * sizeof(int8_t));
+
+      __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+
+      const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+      vscaled01234567 = _mm256_mul_ps(vscaled01234567, vscale);
+
+      vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->fp32_avx2.output_zero_point);
+      __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
+
+      __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
+
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+      vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
+      vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
+
+      _mm_storel_epi64((__m128i*) output, vout0123456701234567);
+      output += 8;
+    }
+    if XNN_UNLIKELY(c != 0) {
+      {
+        __m256i vacc01234567 = _mm256_loadu_si256((const __m256i*) w);
+
+
+        const __m256i vi0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i0));
+        const __m256i vk0x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 0 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi0x01234567, vk0x01234567));
+
+        const __m256i vi1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i1));
+        const __m256i vk1x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 8 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi1x01234567, vk1x01234567));
+
+        const __m256i vi2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i2));
+        const __m256i vk2x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 16 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi2x01234567, vk2x01234567));
+
+        const __m256i vi3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i3));
+        const __m256i vk3x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 24 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi3x01234567, vk3x01234567));
+
+        const __m256i vi4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i4));
+        const __m256i vk4x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 32 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi4x01234567, vk4x01234567));
+
+        const __m256i vi5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i5));
+        const __m256i vk5x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 40 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi5x01234567, vk5x01234567));
+
+        const __m256i vi6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i6));
+        const __m256i vk6x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 48 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi6x01234567, vk6x01234567));
+
+        const __m256i vi7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i7));
+        const __m256i vk7x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 56 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi7x01234567, vk7x01234567));
+
+        const __m256i vi8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) i8));
+        const __m256i vk8x01234567 = _mm256_cvtepi8_epi32(_mm_loadl_epi64((const __m128i*) ((uintptr_t) w + 8 * sizeof(int32_t) + 64 * sizeof(int8_t))));
+
+        vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi8x01234567, vk8x01234567));
+
+
+        __m256 vscaled01234567 = _mm256_cvtepi32_ps(vacc01234567);
+        vscaled01234567 = _mm256_mul_ps(vscaled01234567, _mm256_load_ps(params->fp32_avx2.scale));
+        vacc01234567 = _mm256_cvtps_epi32(vscaled01234567);
+
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->fp32_avx2.output_zero_point);
+        __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
+
+        __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->fp32_avx2.output_max);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->fp32_avx2.output_min);
+        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
+        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
+
+        if (c & 4) {
+          *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567);
+          vout0123456701234567 = _mm_srli_epi64(vout0123456701234567, 32);
+          output += 4;
+        }
+        if (c & 2) {
+          *((uint16_t*) output) = (uint16_t) _mm_extract_epi16(vout0123456701234567, 0);
+          vout0123456701234567 = _mm_srli_epi32(vout0123456701234567, 16);
+          output += 2;
+        }
+        if (c & 1) {
+          *output = (int8_t) _mm_extract_epi8(vout0123456701234567, 0);
+          output += 1;
+        }
+      }
+    }
+
+    output = (int8_t*) ((uintptr_t) output + output_increment);
+  } while (--output_width != 0);
+}
diff --git a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx-mul16.c b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx-mul16.c
index 1f981e8..5ab356a 100644
--- a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx-mul16.c
+++ b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx-mul16.c
@@ -203,8 +203,8 @@
 
       w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t) + 72 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -221,29 +221,29 @@
       const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
       const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
         _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
 
       __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
       vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
 
       _mm_storel_epi64((__m128i*) output, vout0123456701234567);
@@ -364,8 +364,8 @@
         vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp8x01234567lo, vp8x01234567hi));
 
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
         const __m128i vacc57 = _mm_shuffle_epi32(vacc4567, _MM_SHUFFLE(3, 3, 1, 1));
@@ -384,27 +384,27 @@
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
         const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
-        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if (c & 4) {
           *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567);
diff --git a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx-mul32.c b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx-mul32.c
index ae1c6ca..d1362f2 100644
--- a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx-mul32.c
+++ b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx-mul32.c
@@ -168,8 +168,8 @@
 
       w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t) + 72 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -186,24 +186,24 @@
       const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
       const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
         _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
       vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
       vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
@@ -274,8 +274,8 @@
         w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t));
         k += 4;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -287,21 +287,21 @@
 
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc0123), voutput_zero_point);
 
         vout0123 = _mm_packs_epi16(vout0123, vout0123);
-        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 4) {
           _mm_storeu_si32(output, vout0123);
diff --git a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx2-mul32.c b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx2-mul32.c
index 9993035..5e039fe 100644
--- a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx2-mul32.c
+++ b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-avx2-mul32.c
@@ -139,8 +139,8 @@
 
       w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t) + 72 * sizeof(int8_t));
 
-      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
       const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -152,22 +152,22 @@
 
       const __m256i vq31prod01234567 = _mm256_blend_epi16(vq31prod0246, vq31prod1357, 0xCC);
 
-      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
       const __m256i vrem01234567 =
         _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
 
-      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-      const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
+      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+      const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.shift);
       vacc01234567 =
         _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->avx2.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
 
       __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
       vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
       vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
 
@@ -225,8 +225,8 @@
         vacc01234567 = _mm256_add_epi32(vacc01234567, _mm256_mullo_epi32(vi8x01234567, vk8x01234567));
 
 
-        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
         const __m256i vacc1357 = _mm256_shuffle_epi32(vacc01234567, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -238,21 +238,21 @@
 
         const __m256i vq31prod01234567 = _mm256_blend_epi16(vq31prod0246, vq31prod1357, 0xCC);
 
-        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
         const __m256i vrem01234567 =
           _mm256_add_epi32(_mm256_and_si256(vq31prod01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod01234567));
 
-        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-        const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
+        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+        const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.shift);
         vacc01234567 =
           _mm256_sub_epi32(_mm256_sra_epi32(vq31prod01234567, vshift), _mm256_cmpgt_epi32(vrem01234567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->avx2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc01234567), _mm256_extracti128_si256(vacc01234567, 1)), voutput_zero_point);
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
-        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
-        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_max);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.output_min);
         vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
         vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
 
diff --git a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-neon-mul16.c b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-neon-mul16.c
index 9ee7b59..dfd5e06 100644
--- a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-neon-mul16.c
+++ b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-neon-mul16.c
@@ -29,12 +29,12 @@
   assert(channels != 0);
   assert(output_width != 0);
 
-  const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
-  const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+  const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
+  const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
   const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
-  const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
-  const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-  const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+  const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
+  const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+  const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
   do {
     const int8_t* i0 = input[0];
     assert(i0 != NULL);
diff --git a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse2-mul16.c b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse2-mul16.c
index 862adf9..0f87944 100644
--- a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse2-mul16.c
+++ b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse2-mul16.c
@@ -203,8 +203,8 @@
 
       w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t) + 72 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
       const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
       const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -242,26 +242,26 @@
       const __m128i vq31prod0123 = _mm_shuffle_epi32(vq31prod0213, _MM_SHUFFLE(3, 1, 2, 0));
       const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0));
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
         _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
       vout01234567 = _mm_max_epi16(vout01234567, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
       vout01234567 = _mm_min_epi16(vout01234567, voutput_max);
 
       __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
@@ -385,8 +385,8 @@
         vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp8x01234567lo, vp8x01234567hi));
 
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
         const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
         const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -425,24 +425,24 @@
         const __m128i vq31prod0123 = _mm_shuffle_epi32(vq31prod0213, _MM_SHUFFLE(3, 1, 2, 0));
         const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0));
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
-        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_min));
-        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_max));
+        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min));
+        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max));
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
diff --git a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse41-mul16.c b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse41-mul16.c
index d844f6f..f6e8b72 100644
--- a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse41-mul16.c
+++ b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse41-mul16.c
@@ -203,8 +203,8 @@
 
       w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t) + 72 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -221,29 +221,29 @@
       const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
       const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
         _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
 
       __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
       vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
 
       _mm_storel_epi64((__m128i*) output, vout0123456701234567);
@@ -364,8 +364,8 @@
         vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp8x01234567lo, vp8x01234567hi));
 
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
         const __m128i vacc57 = _mm_shuffle_epi32(vacc4567, _MM_SHUFFLE(3, 3, 1, 1));
@@ -384,27 +384,27 @@
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
         const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
-        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123456701234567 = _mm_max_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123456701234567 = _mm_min_epi8(vout0123456701234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if (c & 4) {
           *((uint32_t*) output) = (uint32_t) _mm_cvtsi128_si32(vout0123456701234567);
diff --git a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse41-mul32.c b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse41-mul32.c
index 70e4c87..ff41ad6 100644
--- a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse41-mul32.c
+++ b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-sse41-mul32.c
@@ -168,8 +168,8 @@
 
       w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t) + 72 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -186,24 +186,24 @@
       const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
       const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
         _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
       vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
       vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
@@ -274,8 +274,8 @@
         w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t));
         k += 4;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -287,21 +287,21 @@
 
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc0123), voutput_zero_point);
 
         vout0123 = _mm_packs_epi16(vout0123, vout0123);
-        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 4) {
           _mm_storeu_si32(output, vout0123);
diff --git a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-ssse3-mul16.c b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-ssse3-mul16.c
index 1a7df5c..717701a 100644
--- a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-ssse3-mul16.c
+++ b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-ssse3-mul16.c
@@ -203,8 +203,8 @@
 
       w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t) + 72 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
       const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
       const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -242,26 +242,26 @@
       const __m128i vq31prod0123 = _mm_shuffle_epi32(vq31prod0213, _MM_SHUFFLE(3, 1, 2, 0));
       const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0));
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
         _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
       vout01234567 = _mm_max_epi16(vout01234567, voutput_min);
 
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
       vout01234567 = _mm_min_epi16(vout01234567, voutput_max);
 
       __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
@@ -385,8 +385,8 @@
         vacc4567 = _mm_add_epi32(vacc4567, _mm_unpackhi_epi16(vp8x01234567lo, vp8x01234567hi));
 
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
         const __m128i vnmask0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0123);
         const __m128i vnmask4567 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc4567);
@@ -425,24 +425,24 @@
         const __m128i vq31prod0123 = _mm_shuffle_epi32(vq31prod0213, _MM_SHUFFLE(3, 1, 2, 0));
         const __m128i vq31prod4567 = _mm_shuffle_epi32(vq31prod4657, _MM_SHUFFLE(3, 1, 2, 0));
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
         const __m128i vrem4567 =
           _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
         vacc4567 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
         __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
-        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_min));
-        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->sse2.output_max));
+        vout01234567 = _mm_max_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min));
+        vout01234567 = _mm_min_epi16(vout01234567, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max));
 
         __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
 
diff --git a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-wasmsimd-mul16.c b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-wasmsimd-mul16.c
index cef3330..de37634 100644
--- a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-wasmsimd-mul16.c
+++ b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-wasmsimd-mul16.c
@@ -175,8 +175,8 @@
       const v128_t vacc45 = wasm_v32x4_shuffle(vacc4567, vsign4567, 0, 4, 1, 5);
       const v128_t vacc67 = wasm_v32x4_shuffle(vacc4567, vsign4567, 2, 6, 3, 7);
 
-      const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-      const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+      const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+      const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
       const v128_t vprod01 = wasm_i64x2_add(wasm_i64x2_mul(vacc01, vmultiplier), vrounding);
       const v128_t vprod23 = wasm_i64x2_add(wasm_i64x2_mul(vacc23, vmultiplier), vrounding);
       const v128_t vprod45 = wasm_i64x2_add(wasm_i64x2_mul(vacc45, vmultiplier), vrounding);
@@ -185,20 +185,20 @@
       const v128_t vq31prod0123 = wasm_v32x4_shuffle(vprod01, vprod23, 1, 3, 5, 7);
       const v128_t vq31prod4567 = wasm_v32x4_shuffle(vprod45, vprod67, 1, 3, 5, 7);
 
-      const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+      const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
       const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0123, vremainder_mask), wasm_i32x4_shr(vq31prod0123, 31));
       const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vq31prod4567, vremainder_mask), wasm_i32x4_shr(vq31prod4567, 31));
 
-      const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-      const int32_t vshift = params->wasmsimd.shift;
+      const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+      const int32_t vshift = params->gemmlowp_wasmsimd.shift;
       vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0123, vshift), wasm_i32x4_gt(vrem0123, vthreshold));
       vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod4567, vshift), wasm_i32x4_gt(vrem4567, vthreshold));
 
-      const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+      const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
       v128_t vout01234567 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0123, vacc4567), voutput_zero_point);
 
-      const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
-      const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+      const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
+      const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
       v128_t vout0123456701234567 = wasm_i8x16_min(wasm_i8x16_max(wasm_i8x16_narrow_i16x8(vout01234567, vout01234567), voutput_min), voutput_max);
 
       *((double*) output) = wasm_f64x2_extract_lane(vout0123456701234567, 0);
@@ -291,8 +291,8 @@
       const v128_t vacc45 = wasm_v32x4_shuffle(vacc4567, vsign4567, 0, 4, 1, 5);
       const v128_t vacc67 = wasm_v32x4_shuffle(vacc4567, vsign4567, 2, 6, 3, 7);
 
-      const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-      const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+      const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+      const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
       const v128_t vprod01 = wasm_i64x2_add(wasm_i64x2_mul(vacc01, vmultiplier), vrounding);
       const v128_t vprod23 = wasm_i64x2_add(wasm_i64x2_mul(vacc23, vmultiplier), vrounding);
       const v128_t vprod45 = wasm_i64x2_add(wasm_i64x2_mul(vacc45, vmultiplier), vrounding);
@@ -301,20 +301,20 @@
       const v128_t vq31prod0123 = wasm_v32x4_shuffle(vprod01, vprod23, 1, 3, 5, 7);
       const v128_t vq31prod4567 = wasm_v32x4_shuffle(vprod45, vprod67, 1, 3, 5, 7);
 
-      const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+      const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
       const v128_t vrem0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0123, vremainder_mask), wasm_i32x4_shr(vq31prod0123, 31));
       const v128_t vrem4567 = wasm_i32x4_add(wasm_v128_and(vq31prod4567, vremainder_mask), wasm_i32x4_shr(vq31prod4567, 31));
 
-      const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-      const int32_t vshift = params->wasmsimd.shift;
+      const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+      const int32_t vshift = params->gemmlowp_wasmsimd.shift;
       vacc0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0123, vshift), wasm_i32x4_gt(vrem0123, vthreshold));
       vacc4567 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod4567, vshift), wasm_i32x4_gt(vrem4567, vthreshold));
 
-      const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+      const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
       v128_t vout01234567 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0123, vacc4567), voutput_zero_point);
 
-      const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
-      const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+      const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
+      const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
       v128_t vout0123456701234567 = wasm_i8x16_min(wasm_i8x16_max(wasm_i8x16_narrow_i16x8(vout01234567, vout01234567), voutput_min), voutput_max);
 
       if (c & 4) {
diff --git a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-xop-mul32.c b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-xop-mul32.c
index 943bea2..c1bede9 100644
--- a/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-xop-mul32.c
+++ b/src/qs8-dwconv/gen/up8x9-minmax-gemmlowp-xop-mul32.c
@@ -173,8 +173,8 @@
 
       w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t) + 72 * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
       const __m128i vprod02 = _mm_add_epi64(_mm_mul_epi32(vacc0123, vmultiplier), vrounding);
@@ -191,24 +191,24 @@
       const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
       const __m128i vq31prod4567 = _mm_blend_epi16(vq31prod46, vq31prod57, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       const __m128i vrem0123 =
         _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
       const __m128i vrem4567 =
         _mm_add_epi32(_mm_and_si128(vq31prod4567, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod4567));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       vacc0123 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
       vacc4567 =
         _mm_sub_epi32(_mm_sra_epi32(vq31prod4567, vshift), _mm_cmpgt_epi32(vrem4567, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       __m128i vout01234567 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc4567), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       __m128i vout0123456701234567 = _mm_packs_epi16(vout01234567, vout01234567);
       vout0123456701234567 = _mm_max_epi8(vout0123456701234567, voutput_min);
       vout0123456701234567 = _mm_min_epi8(vout0123456701234567, voutput_max);
@@ -279,8 +279,8 @@
         w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t));
         k += 4;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc13 = _mm_shuffle_epi32(vacc0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -292,21 +292,21 @@
 
         const __m128i vq31prod0123 = _mm_blend_epi16(vq31prod02, vq31prod13, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem0123 =
           _mm_add_epi32(_mm_and_si128(vq31prod0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0123));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc0123 =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod0123, vshift), _mm_cmpgt_epi32(vrem0123, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0123, vacc0123), voutput_zero_point);
 
         vout0123 = _mm_packs_epi16(vout0123, vout0123);
-        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout0123 = _mm_max_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout0123 = _mm_min_epi8(vout0123, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         if XNN_LIKELY(c >= 4) {
           _mm_storeu_si32(output, vout0123);
diff --git a/src/qs8-dwconv/unipass-avx2-mul16.c.in b/src/qs8-dwconv/unipass-avx2-mul16.c.in
index a6b5be5..e01b67e 100644
--- a/src/qs8-dwconv/unipass-avx2-mul16.c.in
+++ b/src/qs8-dwconv/unipass-avx2-mul16.c.in
@@ -4,6 +4,7 @@
 // LICENSE file in the root directory of this source tree.
 
 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+$assert REQUANTIZATION in ["GEMMLOWP", "FP32"]
 $assert CHANNEL_TILE % 16 == 0
 $assert CHANNEL_TILE >= 16
 $assert KERNEL_TILE >= 2
@@ -14,7 +15,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__avx2_mul16(
+void xnn_qs8_dwconv_minmax_${REQUANTIZATION.lower()}_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__avx2_mul16(
     size_t channels,
     size_t output_width,
     const int8_t** input,
@@ -62,47 +63,57 @@
           vacc${ABC[C+8:C+16]} = _mm256_add_epi32(vacc${ABC[C+8:C+16]}, _mm256_cvtepi16_epi32(vprod${K}x${ABC[C+8:C+16]}));
 
       w = (const void*) ((uintptr_t) w + ${CHANNEL_TILE} * sizeof(int32_t) + ${KERNEL_TILE * CHANNEL_TILE} * sizeof(int8_t));
+      $if REQUANTIZATION == "GEMMLOWP":
+        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
-      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+        $for C in range(0, CHANNEL_TILE, 8):
+          const __m256i vacc${ABC[C+1:C+8:2]} = _mm256_shuffle_epi32(vacc${ABC[C:C+8]}, _MM_SHUFFLE(3, 3, 1, 1));
 
-      $for C in range(0, CHANNEL_TILE, 8):
-        const __m256i vacc${ABC[C+1:C+8:2]} = _mm256_shuffle_epi32(vacc${ABC[C:C+8]}, _MM_SHUFFLE(3, 3, 1, 1));
+        $for C in range(0, CHANNEL_TILE, 8):
+          const __m256i vprod${ABC[C:C+8:2]} = _mm256_add_epi64(_mm256_mul_epi32(vacc${ABC[C:C+8]}, vmultiplier), vrounding);
+          const __m256i vprod${ABC[C+1:C+8:2]} = _mm256_add_epi64(_mm256_mul_epi32(vacc${ABC[C+1:C+8:2]}, vmultiplier), vrounding);
 
-      $for C in range(0, CHANNEL_TILE, 8):
-        const __m256i vprod${ABC[C:C+8:2]} = _mm256_add_epi64(_mm256_mul_epi32(vacc${ABC[C:C+8]}, vmultiplier), vrounding);
-        const __m256i vprod${ABC[C+1:C+8:2]} = _mm256_add_epi64(_mm256_mul_epi32(vacc${ABC[C+1:C+8:2]}, vmultiplier), vrounding);
+        $for C in range(0, CHANNEL_TILE, 8):
+          const __m256i vq31prod${ABC[C:C+8:2]} = _mm256_srli_epi64(vprod${ABC[C:C+8:2]}, 31);
+          const __m256i vq31prod${ABC[C+1:C+8:2]} = _mm256_add_epi64(vprod${ABC[C+1:C+8:2]}, vprod${ABC[C+1:C+8:2]});
 
-      $for C in range(0, CHANNEL_TILE, 8):
-        const __m256i vq31prod${ABC[C:C+8:2]} = _mm256_srli_epi64(vprod${ABC[C:C+8:2]}, 31);
-        const __m256i vq31prod${ABC[C+1:C+8:2]} = _mm256_add_epi64(vprod${ABC[C+1:C+8:2]}, vprod${ABC[C+1:C+8:2]});
+        $for C in range(0, CHANNEL_TILE, 8):
+          const __m256i vq31prod${ABC[C:C+8]} = _mm256_blend_epi16(vq31prod${ABC[C:C+8:2]}, vq31prod${ABC[C+1:C+8:2]}, 0xCC);
 
-      $for C in range(0, CHANNEL_TILE, 8):
-        const __m256i vq31prod${ABC[C:C+8]} = _mm256_blend_epi16(vq31prod${ABC[C:C+8:2]}, vq31prod${ABC[C+1:C+8:2]}, 0xCC);
+        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
+        $for C in range(0, CHANNEL_TILE, 8):
+          const __m256i vrem${ABC[C:C+8]} =
+            _mm256_add_epi32(_mm256_and_si256(vq31prod${ABC[C:C+8]}, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod${ABC[C:C+8]}));
 
-      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
-      $for C in range(0, CHANNEL_TILE, 8):
-        const __m256i vrem${ABC[C:C+8]} =
-          _mm256_add_epi32(_mm256_and_si256(vq31prod${ABC[C:C+8]}, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod${ABC[C:C+8]}));
+        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+        $if CHANNEL_TILE > 8:
+          const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_avx2.shift);
+        $else:
+          const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.shift);
+        $for C in range(0, CHANNEL_TILE, 8):
+          vacc${ABC[C:C+8]} =
+            _mm256_sub_epi32(_mm256_sra_epi32(vq31prod${ABC[C:C+8]}, vshift), _mm256_cmpgt_epi32(vrem${ABC[C:C+8]}, vremainder_threshold));
+      $elif REQUANTIZATION == "FP32":
+        $for C in range(0, CHANNEL_TILE, 8):
+          __m256 vscaled${ABC[C:C+8]} = _mm256_cvtepi32_ps(vacc${ABC[C:C+8]});
 
-      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-      $if CHANNEL_TILE > 8:
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
-      $else:
-        const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
-      $for C in range(0, CHANNEL_TILE, 8):
-        vacc${ABC[C:C+8]} =
-          _mm256_sub_epi32(_mm256_sra_epi32(vq31prod${ABC[C:C+8]}, vshift), _mm256_cmpgt_epi32(vrem${ABC[C:C+8]}, vremainder_threshold));
+        const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+        $for C in range(0, CHANNEL_TILE, 8):
+          vscaled${ABC[C:C+8]} = _mm256_mul_ps(vscaled${ABC[C:C+8]}, vscale);
 
-      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx2.output_zero_point);
+        $for C in range(0, CHANNEL_TILE, 8):
+          vacc${ABC[C:C+8]} = _mm256_cvtps_epi32(vscaled${ABC[C:C+8]});
+
+      const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->${REQUANTIZATION.lower()}_avx2.output_zero_point);
       $for C in range(0, CHANNEL_TILE, 16):
         const __m256i vout${ABC[C:C+4]}${ABC[C+8:C+12]}${ABC[C+4:C+8]}${ABC[C+12:C+16]} = _mm256_adds_epi16(_mm256_packs_epi32(vacc${ABC[C:C+8]}, vacc${ABC[C+8:C+16]}), voutput_zero_point);
 
       $for C in range(0, CHANNEL_TILE, 16):
         __m128i vout${ABC[C:C+16]} = _mm_shuffle_epi32(_mm_packs_epi16(_mm256_castsi256_si128(vout${ABC[C:C+4]}${ABC[C+8:C+12]}${ABC[C+4:C+8]}${ABC[C+12:C+16]}), _mm256_extracti128_si256(vout${ABC[C:C+4]}${ABC[C+8:C+12]}${ABC[C+4:C+8]}${ABC[C+12:C+16]}, 1)), _MM_SHUFFLE(3, 1, 2, 0));
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->${REQUANTIZATION.lower()}_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->${REQUANTIZATION.lower()}_avx2.output_max);
       $for C in range(0, CHANNEL_TILE, 16):
         vout${ABC[C:C+16]} = _mm_max_epi8(vout${ABC[C:C+16]}, voutput_min);
         vout${ABC[C:C+16]} = _mm_min_epi8(vout${ABC[C:C+16]}, voutput_max);
@@ -141,44 +152,55 @@
           w = (const void*) ((uintptr_t) w + 16 * sizeof(int32_t));
           k += 16;
 
-        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+        $if REQUANTIZATION == "GEMMLOWP":
+          const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+          const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
-        const __m256i vacc${ABC[1:8:2]} = _mm256_shuffle_epi32(vacc${ABC[0:8]}, _MM_SHUFFLE(3, 3, 1, 1));
-        const __m256i vacc${ABC[9:16:2]} = _mm256_shuffle_epi32(vacc${ABC[8:16]}, _MM_SHUFFLE(3, 3, 1, 1));
+          const __m256i vacc${ABC[1:8:2]} = _mm256_shuffle_epi32(vacc${ABC[0:8]}, _MM_SHUFFLE(3, 3, 1, 1));
+          const __m256i vacc${ABC[9:16:2]} = _mm256_shuffle_epi32(vacc${ABC[8:16]}, _MM_SHUFFLE(3, 3, 1, 1));
 
-        const __m256i vprod${ABC[0:8:2]} = _mm256_add_epi64(_mm256_mul_epi32(vacc${ABC[0:8]}, vmultiplier), vrounding);
-        const __m256i vprod${ABC[1:8:2]} = _mm256_add_epi64(_mm256_mul_epi32(vacc${ABC[1:8:2]}, vmultiplier), vrounding);
-        const __m256i vprod${ABC[8:16:2]} = _mm256_add_epi64(_mm256_mul_epi32(vacc${ABC[8:16]}, vmultiplier), vrounding);
-        const __m256i vprod${ABC[9:16:2]} = _mm256_add_epi64(_mm256_mul_epi32(vacc${ABC[9:16:2]}, vmultiplier), vrounding);
+          const __m256i vprod${ABC[0:8:2]} = _mm256_add_epi64(_mm256_mul_epi32(vacc${ABC[0:8]}, vmultiplier), vrounding);
+          const __m256i vprod${ABC[1:8:2]} = _mm256_add_epi64(_mm256_mul_epi32(vacc${ABC[1:8:2]}, vmultiplier), vrounding);
+          const __m256i vprod${ABC[8:16:2]} = _mm256_add_epi64(_mm256_mul_epi32(vacc${ABC[8:16]}, vmultiplier), vrounding);
+          const __m256i vprod${ABC[9:16:2]} = _mm256_add_epi64(_mm256_mul_epi32(vacc${ABC[9:16:2]}, vmultiplier), vrounding);
 
-        const __m256i vq31prod${ABC[0:8:2]} = _mm256_srli_epi64(vprod${ABC[0:8:2]}, 31);
-        const __m256i vq31prod${ABC[1:8:2]} = _mm256_add_epi64(vprod${ABC[1:8:2]}, vprod${ABC[1:8:2]});
-        const __m256i vq31prod${ABC[8:16:2]} = _mm256_srli_epi64(vprod${ABC[8:16:2]}, 31);
-        const __m256i vq31prod${ABC[9:16:2]} = _mm256_add_epi64(vprod${ABC[9:16:2]}, vprod${ABC[9:16:2]});
+          const __m256i vq31prod${ABC[0:8:2]} = _mm256_srli_epi64(vprod${ABC[0:8:2]}, 31);
+          const __m256i vq31prod${ABC[1:8:2]} = _mm256_add_epi64(vprod${ABC[1:8:2]}, vprod${ABC[1:8:2]});
+          const __m256i vq31prod${ABC[8:16:2]} = _mm256_srli_epi64(vprod${ABC[8:16:2]}, 31);
+          const __m256i vq31prod${ABC[9:16:2]} = _mm256_add_epi64(vprod${ABC[9:16:2]}, vprod${ABC[9:16:2]});
 
-        const __m256i vq31prod${ABC[0:8]} = _mm256_blend_epi16(vq31prod${ABC[0:8:2]}, vq31prod${ABC[1:8:2]}, 0xCC);
-        const __m256i vq31prod${ABC[8:16]} = _mm256_blend_epi16(vq31prod${ABC[8:16:2]}, vq31prod${ABC[9:16:2]}, 0xCC);
+          const __m256i vq31prod${ABC[0:8]} = _mm256_blend_epi16(vq31prod${ABC[0:8:2]}, vq31prod${ABC[1:8:2]}, 0xCC);
+          const __m256i vq31prod${ABC[8:16]} = _mm256_blend_epi16(vq31prod${ABC[8:16:2]}, vq31prod${ABC[9:16:2]}, 0xCC);
 
-        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
-        const __m256i vrem${ABC[0:8]} =
-          _mm256_add_epi32(_mm256_and_si256(vq31prod${ABC[0:8]}, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod${ABC[0:8]}));
-        const __m256i vrem${ABC[8:16]} =
-          _mm256_add_epi32(_mm256_and_si256(vq31prod${ABC[8:16]}, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod${ABC[8:16]}));
+          const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
+          const __m256i vrem${ABC[0:8]} =
+            _mm256_add_epi32(_mm256_and_si256(vq31prod${ABC[0:8]}, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod${ABC[0:8]}));
+          const __m256i vrem${ABC[8:16]} =
+            _mm256_add_epi32(_mm256_and_si256(vq31prod${ABC[8:16]}, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod${ABC[8:16]}));
 
-        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
-        vacc${ABC[0:8]} =
-          _mm256_sub_epi32(_mm256_sra_epi32(vq31prod${ABC[0:8]}, vshift), _mm256_cmpgt_epi32(vrem${ABC[0:8]}, vremainder_threshold));
-        vacc${ABC[8:16]} =
-          _mm256_sub_epi32(_mm256_sra_epi32(vq31prod${ABC[8:16]}, vshift), _mm256_cmpgt_epi32(vrem${ABC[8:16]}, vremainder_threshold));
+          const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+          const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_avx2.shift);
+          vacc${ABC[0:8]} =
+            _mm256_sub_epi32(_mm256_sra_epi32(vq31prod${ABC[0:8]}, vshift), _mm256_cmpgt_epi32(vrem${ABC[0:8]}, vremainder_threshold));
+          vacc${ABC[8:16]} =
+            _mm256_sub_epi32(_mm256_sra_epi32(vq31prod${ABC[8:16]}, vshift), _mm256_cmpgt_epi32(vrem${ABC[8:16]}, vremainder_threshold));
+        $elif REQUANTIZATION == "FP32":
+          __m256 vscaled${ABC[0:8]} = _mm256_cvtepi32_ps(vacc${ABC[0:8]});
+          __m256 vscaled${ABC[8:16]} = _mm256_cvtepi32_ps(vacc${ABC[8:16]});
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->avx2.output_zero_point);
+          const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+          vscaled${ABC[0:8]} = _mm256_mul_ps(vscaled${ABC[0:8]}, vscale);
+          vscaled${ABC[8:16]} = _mm256_mul_ps(vscaled${ABC[8:16]}, vscale);
+
+          vacc${ABC[0:8]} = _mm256_cvtps_epi32(vscaled${ABC[0:8]});
+          vacc${ABC[8:16]} = _mm256_cvtps_epi32(vscaled${ABC[8:16]});
+
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->${REQUANTIZATION.lower()}_avx2.output_zero_point);
         __m128i vout${ABC[0:8]} = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc${ABC[0:8]}), _mm256_extracti128_si256(vacc${ABC[0:8]}, 1)), voutput_zero_point);
         __m128i vout${ABC[8:16]} = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc${ABC[8:16]}), _mm256_extracti128_si256(vacc${ABC[8:16]}, 1)), voutput_zero_point);
 
-        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
-        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->${REQUANTIZATION.lower()}_avx2.output_min);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->${REQUANTIZATION.lower()}_avx2.output_max);
 
         __m128i vout${ABC[0:16]} = _mm_packs_epi16(vout${ABC[0:8]}, vout${ABC[8:16]});
         vout${ABC[0:16]} = _mm_min_epi8(_mm_max_epi8(vout${ABC[0:16]}, voutput_min), voutput_max);
diff --git a/src/qs8-dwconv/unipass-avx2-mul32.c.in b/src/qs8-dwconv/unipass-avx2-mul32.c.in
index 8a031be..8f6d839 100644
--- a/src/qs8-dwconv/unipass-avx2-mul32.c.in
+++ b/src/qs8-dwconv/unipass-avx2-mul32.c.in
@@ -4,6 +4,7 @@
 // LICENSE file in the root directory of this source tree.
 
 $ABC = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+$assert REQUANTIZATION in ["GEMMLOWP", "FP32"]
 $assert CHANNEL_TILE % 8 == 0
 $assert CHANNEL_TILE >= 8
 $assert KERNEL_TILE >= 2
@@ -14,7 +15,7 @@
 #include <xnnpack/dwconv.h>
 
 
-void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__avx2_mul32(
+void xnn_qs8_dwconv_minmax_${REQUANTIZATION.lower()}_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__avx2_mul32(
     size_t channels,
     size_t output_width,
     const int8_t** input,
@@ -60,41 +61,52 @@
 
       w = (const void*) ((uintptr_t) w + ${CHANNEL_TILE} * sizeof(int32_t) + ${KERNEL_TILE * CHANNEL_TILE} * sizeof(int8_t));
 
-      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+      $if REQUANTIZATION == "GEMMLOWP":
+        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
-      $for C in range(0, CHANNEL_TILE, 8):
-        const __m256i vacc${ABC[C+1:C+8:2]} = _mm256_shuffle_epi32(vacc${ABC[C:C+8]}, _MM_SHUFFLE(3, 3, 1, 1));
+        $for C in range(0, CHANNEL_TILE, 8):
+          const __m256i vacc${ABC[C+1:C+8:2]} = _mm256_shuffle_epi32(vacc${ABC[C:C+8]}, _MM_SHUFFLE(3, 3, 1, 1));
 
-      $for C in range(0, CHANNEL_TILE, 8):
-        const __m256i vprod${ABC[C:C+8:2]} = _mm256_add_epi64(_mm256_mul_epi32(vacc${ABC[C:C+8]}, vmultiplier), vrounding);
-        const __m256i vprod${ABC[C+1:C+8:2]} = _mm256_add_epi64(_mm256_mul_epi32(vacc${ABC[C+1:C+8:2]}, vmultiplier), vrounding);
+        $for C in range(0, CHANNEL_TILE, 8):
+          const __m256i vprod${ABC[C:C+8:2]} = _mm256_add_epi64(_mm256_mul_epi32(vacc${ABC[C:C+8]}, vmultiplier), vrounding);
+          const __m256i vprod${ABC[C+1:C+8:2]} = _mm256_add_epi64(_mm256_mul_epi32(vacc${ABC[C+1:C+8:2]}, vmultiplier), vrounding);
 
-      $for C in range(0, CHANNEL_TILE, 8):
-        const __m256i vq31prod${ABC[C:C+8:2]} = _mm256_srli_epi64(vprod${ABC[C:C+8:2]}, 31);
-        const __m256i vq31prod${ABC[C+1:C+8:2]} = _mm256_add_epi64(vprod${ABC[C+1:C+8:2]}, vprod${ABC[C+1:C+8:2]});
+        $for C in range(0, CHANNEL_TILE, 8):
+          const __m256i vq31prod${ABC[C:C+8:2]} = _mm256_srli_epi64(vprod${ABC[C:C+8:2]}, 31);
+          const __m256i vq31prod${ABC[C+1:C+8:2]} = _mm256_add_epi64(vprod${ABC[C+1:C+8:2]}, vprod${ABC[C+1:C+8:2]});
 
-      $for C in range(0, CHANNEL_TILE, 8):
-        const __m256i vq31prod${ABC[C:C+8]} = _mm256_blend_epi16(vq31prod${ABC[C:C+8:2]}, vq31prod${ABC[C+1:C+8:2]}, 0xCC);
+        $for C in range(0, CHANNEL_TILE, 8):
+          const __m256i vq31prod${ABC[C:C+8]} = _mm256_blend_epi16(vq31prod${ABC[C:C+8:2]}, vq31prod${ABC[C+1:C+8:2]}, 0xCC);
 
-      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
-      $for C in range(0, CHANNEL_TILE, 8):
-        const __m256i vrem${ABC[C:C+8]} =
-          _mm256_add_epi32(_mm256_and_si256(vq31prod${ABC[C:C+8]}, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod${ABC[C:C+8]}));
+        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
+        $for C in range(0, CHANNEL_TILE, 8):
+          const __m256i vrem${ABC[C:C+8]} =
+            _mm256_add_epi32(_mm256_and_si256(vq31prod${ABC[C:C+8]}, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod${ABC[C:C+8]}));
 
-      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-      $if CHANNEL_TILE > 8:
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
-      $else:
-        const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
-      $for C in range(0, CHANNEL_TILE, 8):
-        vacc${ABC[C:C+8]} =
-          _mm256_sub_epi32(_mm256_sra_epi32(vq31prod${ABC[C:C+8]}, vshift), _mm256_cmpgt_epi32(vrem${ABC[C:C+8]}, vremainder_threshold));
+        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+        $if CHANNEL_TILE > 8:
+          const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_avx2.shift);
+        $else:
+          const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.shift);
+        $for C in range(0, CHANNEL_TILE, 8):
+          vacc${ABC[C:C+8]} =
+            _mm256_sub_epi32(_mm256_sra_epi32(vq31prod${ABC[C:C+8]}, vshift), _mm256_cmpgt_epi32(vrem${ABC[C:C+8]}, vremainder_threshold));
+      $elif REQUANTIZATION == "FP32":
+        $for C in range(0, CHANNEL_TILE, 8):
+          __m256 vscaled${ABC[C:C+8]} = _mm256_cvtepi32_ps(vacc${ABC[C:C+8]});
+
+        const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+        $for C in range(0, CHANNEL_TILE, 8):
+          vscaled${ABC[C:C+8]} = _mm256_mul_ps(vscaled${ABC[C:C+8]}, vscale);
+
+        $for C in range(0, CHANNEL_TILE, 8):
+          vacc${ABC[C:C+8]} = _mm256_cvtps_epi32(vscaled${ABC[C:C+8]});
 
       $if CHANNEL_TILE > 8:
-        const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx2.output_zero_point);
+        const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->${REQUANTIZATION.lower()}_avx2.output_zero_point);
       $else:
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->avx2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->${REQUANTIZATION.lower()}_avx2.output_zero_point);
       $for C in range(0, CHANNEL_TILE, 16):
         $if C + 8 < CHANNEL_TILE:
           __m256i vout${ABC[C:C+4]}${ABC[C+8:C+12]}${ABC[C+4:C+8]}${ABC[C+12:C+16]} = _mm256_adds_epi16(_mm256_packs_epi32(vacc${ABC[C:C+8]}, vacc${ABC[C+8:C+16]}), voutput_zero_point);
@@ -109,8 +121,8 @@
         $else:
           __m128i vout${ABC[C:C+8]}${ABC[C:C+8]} = _mm_packs_epi16(vout${ABC[C:C+8]}, vout${ABC[C:C+8]});
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->${REQUANTIZATION.lower()}_avx2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->${REQUANTIZATION.lower()}_avx2.output_max);
       $for C in range(0, CHANNEL_TILE, 16):
         $if C + 8 < CHANNEL_TILE:
           vout${ABC[C:C+16]} = _mm_max_epi8(vout${ABC[C:C+16]}, voutput_min);
@@ -155,34 +167,39 @@
           w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
           k += 8;
 
-        const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-        const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+        $if REQUANTIZATION == "GEMMLOWP":
+          const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+          const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
-        const __m256i vacc${ABC[1:8:2]} = _mm256_shuffle_epi32(vacc${ABC[0:8]}, _MM_SHUFFLE(3, 3, 1, 1));
+          const __m256i vacc${ABC[1:8:2]} = _mm256_shuffle_epi32(vacc${ABC[0:8]}, _MM_SHUFFLE(3, 3, 1, 1));
 
-        const __m256i vprod${ABC[0:8:2]} = _mm256_add_epi64(_mm256_mul_epi32(vacc${ABC[0:8]}, vmultiplier), vrounding);
-        const __m256i vprod${ABC[1:8:2]} = _mm256_add_epi64(_mm256_mul_epi32(vacc${ABC[1:8:2]}, vmultiplier), vrounding);
+          const __m256i vprod${ABC[0:8:2]} = _mm256_add_epi64(_mm256_mul_epi32(vacc${ABC[0:8]}, vmultiplier), vrounding);
+          const __m256i vprod${ABC[1:8:2]} = _mm256_add_epi64(_mm256_mul_epi32(vacc${ABC[1:8:2]}, vmultiplier), vrounding);
 
-        const __m256i vq31prod${ABC[0:8:2]} = _mm256_srli_epi64(vprod${ABC[0:8:2]}, 31);
-        const __m256i vq31prod${ABC[1:8:2]} = _mm256_add_epi64(vprod${ABC[1:8:2]}, vprod${ABC[1:8:2]});
+          const __m256i vq31prod${ABC[0:8:2]} = _mm256_srli_epi64(vprod${ABC[0:8:2]}, 31);
+          const __m256i vq31prod${ABC[1:8:2]} = _mm256_add_epi64(vprod${ABC[1:8:2]}, vprod${ABC[1:8:2]});
 
-        const __m256i vq31prod${ABC[0:8]} = _mm256_blend_epi16(vq31prod${ABC[0:8:2]}, vq31prod${ABC[1:8:2]}, 0xCC);
+          const __m256i vq31prod${ABC[0:8]} = _mm256_blend_epi16(vq31prod${ABC[0:8:2]}, vq31prod${ABC[1:8:2]}, 0xCC);
 
-        const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
-        const __m256i vrem${ABC[0:8]} =
-          _mm256_add_epi32(_mm256_and_si256(vq31prod${ABC[0:8]}, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod${ABC[0:8]}));
+          const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
+          const __m256i vrem${ABC[0:8]} =
+            _mm256_add_epi32(_mm256_and_si256(vq31prod${ABC[0:8]}, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod${ABC[0:8]}));
 
-        const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-        const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
-        vacc${ABC[0:8]} =
-          _mm256_sub_epi32(_mm256_sra_epi32(vq31prod${ABC[0:8]}, vshift), _mm256_cmpgt_epi32(vrem${ABC[0:8]}, vremainder_threshold));
+          const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+          const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.shift);
+          vacc${ABC[0:8]} =
+            _mm256_sub_epi32(_mm256_sra_epi32(vq31prod${ABC[0:8]}, vshift), _mm256_cmpgt_epi32(vrem${ABC[0:8]}, vremainder_threshold));
+        $elif REQUANTIZATION == "FP32":
+          __m256 vscaled${ABC[0:8]} = _mm256_cvtepi32_ps(vacc${ABC[0:8]});
+          vscaled${ABC[0:8]} = _mm256_mul_ps(vscaled${ABC[0:8]}, _mm256_load_ps(params->fp32_avx2.scale));
+          vacc${ABC[0:8]} = _mm256_cvtps_epi32(vscaled${ABC[0:8]});
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->avx2.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->${REQUANTIZATION.lower()}_avx2.output_zero_point);
         __m128i vout${ABC[0:8]} = _mm_adds_epi16(_mm_packs_epi32(_mm256_castsi256_si128(vacc${ABC[0:8]}), _mm256_extracti128_si256(vacc${ABC[0:8]}, 1)), voutput_zero_point);
 
         __m128i vout${ABC[0:8]}${ABC[0:8]} = _mm_packs_epi16(vout${ABC[0:8]}, vout${ABC[0:8]});
-        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->avx2.output_max);
-        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->avx2.output_min);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->${REQUANTIZATION.lower()}_avx2.output_max);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->${REQUANTIZATION.lower()}_avx2.output_min);
         vout${ABC[0:8]}${ABC[0:8]} = _mm_min_epi8(vout${ABC[0:8]}${ABC[0:8]}, voutput_max);
         vout${ABC[0:8]}${ABC[0:8]} = _mm_max_epi8(vout${ABC[0:8]}${ABC[0:8]}, voutput_min);
 
diff --git a/src/qs8-dwconv/unipass-avx512skx-mul32.c.in b/src/qs8-dwconv/unipass-avx512skx-mul32.c.in
index 1a8cf8f..71e6ddb 100644
--- a/src/qs8-dwconv/unipass-avx512skx-mul32.c.in
+++ b/src/qs8-dwconv/unipass-avx512skx-mul32.c.in
@@ -31,20 +31,20 @@
   assert(output_width != 0);
 
   const __mmask16 vblend_mask = _cvtu32_mask16(0xAAAA);
-  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.multiplier));
-  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.rounding));
-  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.remainder_mask));
-  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.remainder_threshold));
-  const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier));
+  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding));
+  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask));
+  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold));
+  const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
   $if CHANNEL_TILE > 16:
-    const __m512i voutput_zero_point = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.output_zero_point));
-    const __m512i voutput_min = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.output_min));
-    const __m512i voutput_max = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.output_max));
+    const __m512i voutput_zero_point = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point));
+    const __m512i voutput_min = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min));
+    const __m512i voutput_max = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max));
     const __m256i vpermute_mask = _mm256_set_epi32(7, 3, 5, 1, 6, 2, 4, 0);
   $else:
-    const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.output_zero_point));
-    const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.output_min));
-    const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.output_max));
+    const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point));
+    const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min));
+    const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max));
 
   do {
     $for K in range(KERNEL_TILE):
diff --git a/src/qs8-dwconv/unipass-neon-mul16.c.in b/src/qs8-dwconv/unipass-neon-mul16.c.in
index ef0a0dc..47a7832 100644
--- a/src/qs8-dwconv/unipass-neon-mul16.c.in
+++ b/src/qs8-dwconv/unipass-neon-mul16.c.in
@@ -29,16 +29,16 @@
   assert(channels != 0);
   assert(output_width != 0);
 
-  const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
-  const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+  const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
+  const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
   const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
-  const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+  const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
   $if CHANNEL_TILE == 8:
-    const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-    const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+    const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
   $else:
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
   do {
     $for K in range(KERNEL_TILE):
       const int8_t* i${K} = input[${K}];
diff --git a/src/qs8-dwconv/unipass-scalar.c.in b/src/qs8-dwconv/unipass-scalar.c.in
index 8881d15..268d5de 100644
--- a/src/qs8-dwconv/unipass-scalar.c.in
+++ b/src/qs8-dwconv/unipass-scalar.c.in
@@ -25,14 +25,14 @@
   assert(channels != 0);
   assert(output_width != 0);
 
-  const int32_t vmultiplier = params->scalar.multiplier;
+  const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
   const int64_t vq31rounding = INT64_C(0x40000000);
-  const int32_t vremainder_mask = params->scalar.remainder_mask;
-  const uint32_t vshift = params->scalar.shift;
-  const int32_t vremainder_threshold = params->scalar.remainder_threshold;
-  const int32_t vout_min = params->scalar.output_min_less_zero_point;
-  const int32_t vout_max = params->scalar.output_max_less_zero_point;
-  const int32_t voutput_zero_point = params->scalar.output_zero_point;
+  const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
+  const uint32_t vshift = params->gemmlowp_scalar.shift;
+  const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
+  const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
+  const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
+  const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
   do {
     $for K in range(KERNEL_TILE):
       const int8_t* i${K} = input[${K}];
diff --git a/src/qs8-dwconv/unipass-sse-mul16.c.in b/src/qs8-dwconv/unipass-sse-mul16.c.in
index 011c123..a367104 100644
--- a/src/qs8-dwconv/unipass-sse-mul16.c.in
+++ b/src/qs8-dwconv/unipass-sse-mul16.c.in
@@ -15,7 +15,7 @@
 #include <xnnpack/dwconv.h>
 
 
-$PARAMS_STRUCT = "sse4" if SSE >= 4 else "sse2"
+$PARAMS_STRUCT = "gemmlowp_sse4" if SSE >= 4 else "gemmlowp_sse2"
 $ISA = "avx" if AVX else {2: "sse2", 3: "ssse3", 4: "sse41"}[SSE]
 void xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up${CHANNEL_TILE}x${KERNEL_TILE}__${ISA}_mul16(
     size_t channels,
@@ -142,11 +142,11 @@
         __m128i vout${ABC[C:C+8]} = _mm_adds_epi16(_mm_packs_epi32(vacc${ABC[C:C+4]}, vacc${ABC[C+4:C+8]}), voutput_zero_point);
 
       $if SSE < 4:
-        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
         $for C in range(0, CHANNEL_TILE, 8):
           vout${ABC[C:C+8]} = _mm_max_epi16(vout${ABC[C:C+8]}, voutput_min);
 
-        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
         $for C in range(0, CHANNEL_TILE, 8):
           vout${ABC[C:C+8]} = _mm_min_epi16(vout${ABC[C:C+8]}, voutput_max);
 
@@ -157,14 +157,14 @@
           __m128i vout${ABC[C:C+8]}${ABC[C:C+8]} = _mm_packs_epi16(vout${ABC[C:C+8]}, vout${ABC[C:C+8]});
 
       $if SSE == 4:
-        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
+        const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
         $for C in range(0, CHANNEL_TILE, 16):
           $if C + 8 < CHANNEL_TILE:
             vout${ABC[C:C+16]} = _mm_max_epi8(vout${ABC[C:C+16]}, voutput_min);
           $else:
             vout${ABC[C:C+8]}${ABC[C:C+8]} = _mm_max_epi8(vout${ABC[C:C+8]}${ABC[C:C+8]}, voutput_min);
 
-        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+        const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
         $for C in range(0, CHANNEL_TILE, 16):
           $if C + 8 < CHANNEL_TILE:
             vout${ABC[C:C+16]} = _mm_min_epi8(vout${ABC[C:C+16]}, voutput_max);
@@ -299,14 +299,14 @@
         __m128i vout${ABC[0:8]} = _mm_adds_epi16(_mm_packs_epi32(vacc${ABC[0:4]}, vacc${ABC[4:8]}), voutput_zero_point);
 
         $if SSE < 4:
-          vout${ABC[0:8]} = _mm_max_epi16(vout${ABC[0:8]}, _mm_load_si128((const __m128i*) params->sse2.output_min));
-          vout${ABC[0:8]} = _mm_min_epi16(vout${ABC[0:8]}, _mm_load_si128((const __m128i*) params->sse2.output_max));
+          vout${ABC[0:8]} = _mm_max_epi16(vout${ABC[0:8]}, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min));
+          vout${ABC[0:8]} = _mm_min_epi16(vout${ABC[0:8]}, _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max));
 
         __m128i vout${ABC[0:8]}${ABC[0:8]} = _mm_packs_epi16(vout${ABC[0:8]}, vout${ABC[0:8]});
 
         $if SSE == 4:
-          vout${ABC[0:8]}${ABC[0:8]} = _mm_max_epi8(vout${ABC[0:8]}${ABC[0:8]}, _mm_load_si128((const __m128i*) params->sse4.output_min));
-          vout${ABC[0:8]}${ABC[0:8]} = _mm_min_epi8(vout${ABC[0:8]}${ABC[0:8]}, _mm_load_si128((const __m128i*) params->sse4.output_max));
+          vout${ABC[0:8]}${ABC[0:8]} = _mm_max_epi8(vout${ABC[0:8]}${ABC[0:8]}, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+          vout${ABC[0:8]}${ABC[0:8]} = _mm_min_epi8(vout${ABC[0:8]}${ABC[0:8]}, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         $if CHANNEL_TILE > 8:
           if XNN_LIKELY(c >= 8) {
diff --git a/src/qs8-dwconv/unipass-sse-mul32.c.in b/src/qs8-dwconv/unipass-sse-mul32.c.in
index df8f462..82bd994 100644
--- a/src/qs8-dwconv/unipass-sse-mul32.c.in
+++ b/src/qs8-dwconv/unipass-sse-mul32.c.in
@@ -75,8 +75,8 @@
 
       w = (const void*) ((uintptr_t) w + ${CHANNEL_TILE} * sizeof(int32_t) + ${KERNEL_TILE * CHANNEL_TILE} * sizeof(int8_t));
 
-      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-      const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+      const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+      const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
       $for C in range(0, CHANNEL_TILE, 4):
         const __m128i vacc${ABC[C+1:C+4:2]} = _mm_shuffle_epi32(vacc${ABC[C:C+4]}, _MM_SHUFFLE(3, 3, 1, 1));
@@ -90,23 +90,23 @@
       $for C in range(0, CHANNEL_TILE, 4):
         const __m128i vq31prod${ABC[C:C+4]} = _mm_blend_epi16(vq31prod${ABC[C:C+4:2]}, vq31prod${ABC[C+1:C+4:2]}, 0xCC);
 
-      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+      const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
       $for C in range(0, CHANNEL_TILE, 4):
         const __m128i vrem${ABC[C:C+4]} =
           _mm_add_epi32(_mm_and_si128(vq31prod${ABC[C:C+4]}, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod${ABC[C:C+4]}));
 
-      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+      const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
       $for C in range(0, CHANNEL_TILE, 4):
         vacc${ABC[C:C+4]} =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod${ABC[C:C+4]}, vshift), _mm_cmpgt_epi32(vrem${ABC[C:C+4]}, vremainder_threshold));
 
-      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+      const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
       $for C in range(0, CHANNEL_TILE, 8):
         __m128i vout${ABC[C:C+8]} = _mm_adds_epi16(_mm_packs_epi32(vacc${ABC[C:C+4]}, vacc${ABC[C+4:C+8]}), voutput_zero_point);
 
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
       $for C in range(0, CHANNEL_TILE, 16):
         $if C + 8 < CHANNEL_TILE:
           __m128i vout${ABC[C:C+16]} = _mm_packs_epi16(vout${ABC[C:C+8]}, vout${ABC[C+8:C+16]});
@@ -156,8 +156,8 @@
           w = (const void*) ((uintptr_t) w + 4 * sizeof(int32_t));
           k += 4;
 
-        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-        const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+        const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+        const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
         const __m128i vacc${ABC[1:4:2]} = _mm_shuffle_epi32(vacc${ABC[0:4]}, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -169,21 +169,21 @@
 
         const __m128i vq31prod${ABC[0:4]} = _mm_blend_epi16(vq31prod${ABC[0:4:2]}, vq31prod${ABC[1:4:2]}, 0xCC);
 
-        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+        const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
         const __m128i vrem${ABC[0:4]} =
           _mm_add_epi32(_mm_and_si128(vq31prod${ABC[0:4]}, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod${ABC[0:4]}));
 
-        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+        const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
         vacc${ABC[0:4]} =
           _mm_sub_epi32(_mm_sra_epi32(vq31prod${ABC[0:4]}, vshift), _mm_cmpgt_epi32(vrem${ABC[0:4]}, vremainder_threshold));
 
-        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+        const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
         __m128i vout${ABC[0:4]} = _mm_adds_epi16(_mm_packs_epi32(vacc${ABC[0:4]}, vacc${ABC[0:4]}), voutput_zero_point);
 
         vout${ABC[0:4]} = _mm_packs_epi16(vout${ABC[0:4]}, vout${ABC[0:4]});
-        vout${ABC[0:4]} = _mm_max_epi8(vout${ABC[0:4]}, _mm_load_si128((const __m128i*) params->sse4.output_min));
-        vout${ABC[0:4]} = _mm_min_epi8(vout${ABC[0:4]}, _mm_load_si128((const __m128i*) params->sse4.output_max));
+        vout${ABC[0:4]} = _mm_max_epi8(vout${ABC[0:4]}, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+        vout${ABC[0:4]} = _mm_min_epi8(vout${ABC[0:4]}, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
         $if CHANNEL_TILE > 4:
           if XNN_LIKELY(c >= 4) {
diff --git a/src/qs8-dwconv/unipass-wasmsimd-mul16.c.in b/src/qs8-dwconv/unipass-wasmsimd-mul16.c.in
index d32f78b..db5295b 100644
--- a/src/qs8-dwconv/unipass-wasmsimd-mul16.c.in
+++ b/src/qs8-dwconv/unipass-wasmsimd-mul16.c.in
@@ -71,29 +71,29 @@
         const v128_t vacc${ABC[C:C+2]} = wasm_v32x4_shuffle(vacc${ABC[C:C+4]}, vsign${ABC[C:C+4]}, 0, 4, 1, 5);
         const v128_t vacc${ABC[C+2:C+4]} = wasm_v32x4_shuffle(vacc${ABC[C:C+4]}, vsign${ABC[C:C+4]}, 2, 6, 3, 7);
 
-      const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-      const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+      const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+      const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
       $for C in range(0, CHANNEL_TILE, 2):
         const v128_t vprod${ABC[C:C+2]} = wasm_i64x2_add(wasm_i64x2_mul(vacc${ABC[C:C+2]}, vmultiplier), vrounding);
 
       $for C in range(0, CHANNEL_TILE, 4):
         const v128_t vq31prod${ABC[C:C+4]} = wasm_v32x4_shuffle(vprod${ABC[C:C+2]}, vprod${ABC[C+2:C+4]}, 1, 3, 5, 7);
 
-      const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+      const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
       $for C in range(0, CHANNEL_TILE, 4):
         const v128_t vrem${ABC[C:C+4]} = wasm_i32x4_add(wasm_v128_and(vq31prod${ABC[C:C+4]}, vremainder_mask), wasm_i32x4_shr(vq31prod${ABC[C:C+4]}, 31));
 
-      const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-      const int32_t vshift = params->wasmsimd.shift;
+      const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+      const int32_t vshift = params->gemmlowp_wasmsimd.shift;
       $for C in range(0, CHANNEL_TILE, 4):
         vacc${ABC[C:C+4]} = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod${ABC[C:C+4]}, vshift), wasm_i32x4_gt(vrem${ABC[C:C+4]}, vthreshold));
 
-      const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+      const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
       $for C in range(0, CHANNEL_TILE, 8):
         v128_t vout${ABC[C:C+8]} = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc${ABC[C:C+4]}, vacc${ABC[C+4:C+8]}), voutput_zero_point);
 
-      const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
-      const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+      const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
+      const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
       $for C in range(0, CHANNEL_TILE, 16):
         $if C + 8 < CHANNEL_TILE:
           v128_t vout${ABC[C:C+16]} = wasm_i8x16_min(wasm_i8x16_max(wasm_i8x16_narrow_i16x8(vout${ABC[C:C+8]}, vout${ABC[C+8:C+16]}), voutput_min), voutput_max);
@@ -148,8 +148,8 @@
       const v128_t vacc${ABC[4:6]} = wasm_v32x4_shuffle(vacc${ABC[4:8]}, vsign${ABC[4:8]}, 0, 4, 1, 5);
       const v128_t vacc${ABC[6:8]} = wasm_v32x4_shuffle(vacc${ABC[4:8]}, vsign${ABC[4:8]}, 2, 6, 3, 7);
 
-      const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-      const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+      const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+      const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
       const v128_t vprod${ABC[0:2]} = wasm_i64x2_add(wasm_i64x2_mul(vacc${ABC[0:2]}, vmultiplier), vrounding);
       const v128_t vprod${ABC[2:4]} = wasm_i64x2_add(wasm_i64x2_mul(vacc${ABC[2:4]}, vmultiplier), vrounding);
       const v128_t vprod${ABC[4:6]} = wasm_i64x2_add(wasm_i64x2_mul(vacc${ABC[4:6]}, vmultiplier), vrounding);
@@ -158,20 +158,20 @@
       const v128_t vq31prod${ABC[0:4]} = wasm_v32x4_shuffle(vprod${ABC[0:2]}, vprod${ABC[2:4]}, 1, 3, 5, 7);
       const v128_t vq31prod${ABC[4:8]} = wasm_v32x4_shuffle(vprod${ABC[4:6]}, vprod${ABC[6:8]}, 1, 3, 5, 7);
 
-      const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+      const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
       const v128_t vrem${ABC[0:4]} = wasm_i32x4_add(wasm_v128_and(vq31prod${ABC[0:4]}, vremainder_mask), wasm_i32x4_shr(vq31prod${ABC[0:4]}, 31));
       const v128_t vrem${ABC[4:8]} = wasm_i32x4_add(wasm_v128_and(vq31prod${ABC[4:8]}, vremainder_mask), wasm_i32x4_shr(vq31prod${ABC[4:8]}, 31));
 
-      const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-      const int32_t vshift = params->wasmsimd.shift;
+      const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+      const int32_t vshift = params->gemmlowp_wasmsimd.shift;
       vacc${ABC[0:4]} = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod${ABC[0:4]}, vshift), wasm_i32x4_gt(vrem${ABC[0:4]}, vthreshold));
       vacc${ABC[4:8]} = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod${ABC[4:8]}, vshift), wasm_i32x4_gt(vrem${ABC[4:8]}, vthreshold));
 
-      const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+      const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
       v128_t vout${ABC[0:8]} = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc${ABC[0:4]}, vacc${ABC[4:8]}), voutput_zero_point);
 
-      const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
-      const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+      const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
+      const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
       v128_t vout${ABC[0:8]}${ABC[0:8]} = wasm_i8x16_min(wasm_i8x16_max(wasm_i8x16_narrow_i16x8(vout${ABC[0:8]}, vout${ABC[0:8]}), voutput_min), voutput_max);
 
       $if CHANNEL_TILE > 8:
diff --git a/src/qs8-gemm/MRx16c8-avx512skx.c.in b/src/qs8-gemm/MRx16c8-avx512skx.c.in
index 9b842bc..a2d725d 100644
--- a/src/qs8-gemm/MRx16c8-avx512skx.c.in
+++ b/src/qs8-gemm/MRx16c8-avx512skx.c.in
@@ -61,24 +61,24 @@
 
   const __mmask16 vbias_mask = _cvtu32_mask16(0x1111);
   const __mmask16 vblend_mask = _cvtu32_mask16(0xAAAA);
-  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.multiplier));
-  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.rounding));
-  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.remainder_mask));
-  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.remainder_threshold));
-  const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier));
+  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding));
+  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask));
+  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold));
+  const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
   $if MR > 1:
-    const __m512i voutput_zero_point = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.output_zero_point));
+    const __m512i voutput_zero_point = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point));
   $else:
-    const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse4.output_zero_point));
+    const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point));
   $if MR > 2:
-    const __m512i voutput_min = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.output_min));
-    const __m512i voutput_max = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.output_max));
+    const __m512i voutput_min = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    const __m512i voutput_max = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
   $elif MR == 2:
-    const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse4.output_min));
-    const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse4.output_max));
+    const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
   $else:
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
   do {
     __m512i vacc0x0123 = _mm512_maskz_expandloadu_epi32(vbias_mask, w);
     $for N in range(4, 16, 4):
diff --git a/src/qs8-gemm/MRx4c2-sse.c.in b/src/qs8-gemm/MRx4c2-sse.c.in
index 77306ed..cdd7e71 100644
--- a/src/qs8-gemm/MRx4c2-sse.c.in
+++ b/src/qs8-gemm/MRx4c2-sse.c.in
@@ -27,7 +27,7 @@
 
 $LOAD_SUFFIX = {"LD128": "_ld128", "LD64": "_ld64", "EXTENDED": ""}[VARIANT]
 $GEMM_SUFFIX = "_xw" if VARIANT == "EXTENDED" else ""
-$PARAMS_STRUCT = "sse4" if SSE >= 4 else "sse2"
+$PARAMS_STRUCT = "gemmlowp_sse4" if SSE >= 4 else "gemmlowp_sse2"
 $ISA = "xop" if XOP else "avx" if AVX else {2: "sse2", 3: "ssse3", 4: "sse41"}[SSE]
 void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_gemmlowp_ukernel_${MR}x4c2__${ISA}${LOAD_SUFFIX}(
     size_t mr,
@@ -297,8 +297,8 @@
       __m128i vacc${M}${min(M+1, MR-1)}x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc${M}x0123, vacc${min(M+1, MR-1)}x0123), voutput_zero_point);
 
     $if SSE < 4:
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
       $for M in range(0, MR, 2):
         vacc${M}${min(M+1, MR-1)}x0123 = _mm_min_epi16(_mm_max_epi16(vacc${M}${min(M+1, MR-1)}x0123, voutput_min), voutput_max);
 
@@ -308,8 +308,8 @@
       __m128i vout = _mm_packs_epi16(vacc0${min(1, MR-1)}x0123, vacc0${min(1, MR-1)}x0123);
 
     $if SSE == 4:
-      vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-      vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+      vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+      vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/MRx4c8-sse.c.in b/src/qs8-gemm/MRx4c8-sse.c.in
index 656f725..6f26c05 100644
--- a/src/qs8-gemm/MRx4c8-sse.c.in
+++ b/src/qs8-gemm/MRx4c8-sse.c.in
@@ -27,7 +27,7 @@
 
 $LOAD_SUFFIX = {"LD128": "_ld128", "LD64": "_ld64", "EXTENDED": ""}[VARIANT]
 $GEMM_SUFFIX = "_xw" if VARIANT == "EXTENDED" else ""
-$PARAMS_STRUCT = "sse4" if SSE >= 4 else "sse2"
+$PARAMS_STRUCT = "gemmlowp_sse4" if SSE >= 4 else "gemmlowp_sse2"
 $ISA = "xop" if XOP else "avx" if AVX else {2: "sse2", 3: "ssse3", 4: "sse41"}[SSE]
 void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_gemmlowp_ukernel_${MR}x4c8__${ISA}${LOAD_SUFFIX}(
     size_t mr,
@@ -234,8 +234,8 @@
       __m128i vacc${M}${min(M+1, MR-1)}x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc${M}x0123, vacc${min(M+1, MR-1)}x0123), voutput_zero_point);
 
     $if SSE < 4:
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->${PARAMS_STRUCT}.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->${PARAMS_STRUCT}.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
       $for M in range(0, MR, 2):
         vacc${M}${min(M+1, MR-1)}x0123 = _mm_min_epi16(_mm_max_epi16(vacc${M}${min(M+1, MR-1)}x0123, voutput_min), voutput_max);
 
@@ -245,8 +245,8 @@
       __m128i vout = _mm_packs_epi16(vacc0${min(1, MR-1)}x0123, vacc0${min(1, MR-1)}x0123);
 
     $if SSE == 4:
-      vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-      vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+      vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+      vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/MRx4c8-wasmsimd.c.in b/src/qs8-gemm/MRx4c8-wasmsimd.c.in
index c91c1cc..14ec68d 100644
--- a/src/qs8-gemm/MRx4c8-wasmsimd.c.in
+++ b/src/qs8-gemm/MRx4c8-wasmsimd.c.in
@@ -131,8 +131,8 @@
     $for M in range(MR):
       const v128_t vacc${M}x01 = wasm_v32x4_shuffle(vacc${M}x0123, vsign${M}x0123, 0, 4, 1, 5);
 
-    const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-    const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+    const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+    const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
     $for M in range(MR):
       const v128_t vprod${M}x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc${M}x01, vmultiplier), vrounding);
       const v128_t vacc${M}x23 = wasm_v32x4_shuffle(vacc${M}x0123, vsign${M}x0123, 2, 6, 3, 7);
@@ -143,16 +143,16 @@
     $for M in range(MR):
       const v128_t vq31prod${M}x0123 = wasm_v32x4_shuffle(vprod${M}x01, vprod${M}x23, 1, 3, 5, 7);
 
-    const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+    const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
     $for M in range(MR):
       const v128_t vrem${M}x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod${M}x0123, vremainder_mask), wasm_i32x4_lt(vq31prod${M}x0123, vzero));
 
-    const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-    const int32_t vshift = params->wasmsimd.shift;
+    const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+    const int32_t vshift = params->gemmlowp_wasmsimd.shift;
     $for M in range(MR):
       vacc${M}x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod${M}x0123, vshift), wasm_i32x4_gt(vrem${M}x0123, vthreshold));
 
-    const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+    const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
     $for M in range(0, MR, 2):
       v128_t vacc${M}${min(M+1, MR-1)}x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc${M}x0123, vacc${min(M+1, MR-1)}x0123), voutput_zero_point);
 
@@ -161,10 +161,10 @@
     $else:
       v128_t vout = wasm_i8x16_narrow_i16x8(vacc0${min(1, MR-1)}x0123, vacc0${min(1, MR-1)}x0123);
 
-    const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
+    const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
     vout = wasm_i8x16_max(vout, voutput_min);
 
-    const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+    const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
     vout = wasm_i8x16_min(vout, voutput_max);
 
     if (nc >= 4) {
diff --git a/src/qs8-gemm/MRx8c8-avx2.c.in b/src/qs8-gemm/MRx8c8-avx2.c.in
index 761da0f..f92a9aa 100644
--- a/src/qs8-gemm/MRx8c8-avx2.c.in
+++ b/src/qs8-gemm/MRx8c8-avx2.c.in
@@ -3,6 +3,7 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
+$assert REQUANTIZATION in ["GEMMLOWP", "FP32"]
 $assert VARIANT in ["LD128", "EXTENDED"]
 $assert MR <= 4
 #include <assert.h>
@@ -15,7 +16,7 @@
 
 
 $GEMM_SUFFIX = "_xw" if VARIANT == "EXTENDED" else ""
-void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_gemmlowp_ukernel_${MR}x8c8__avx2(
+void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_${REQUANTIZATION.lower()}_ukernel_${MR}x8c8__avx2(
     size_t mr,
     size_t nc,
     size_t kc,
@@ -112,40 +113,51 @@
     $for M in range(MR):
       __m256i vacc${M}x01234567 = _mm256_permutevar8x32_epi32(vacc${M}x02461357, vpermute_mask);
 
-    const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-    const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+    $if REQUANTIZATION == "GEMMLOWP":
+      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
-    $for M in range(MR):
-      const __m256i vacc${M}x11335577 = _mm256_srli_epi64(vacc${M}x01234567, 32);
+      $for M in range(MR):
+        const __m256i vacc${M}x11335577 = _mm256_srli_epi64(vacc${M}x01234567, 32);
 
-    $for M in range(MR):
-      const __m256i vprod${M}x0246 = _mm256_add_epi64(_mm256_mul_epi32(vacc${M}x01234567, vmultiplier), vrounding);
+      $for M in range(MR):
+        const __m256i vprod${M}x0246 = _mm256_add_epi64(_mm256_mul_epi32(vacc${M}x01234567, vmultiplier), vrounding);
 
-    $for M in range(MR):
-      const __m256i vprod${M}x1357 = _mm256_add_epi64(_mm256_mul_epi32(vacc${M}x11335577, vmultiplier), vrounding);
+      $for M in range(MR):
+        const __m256i vprod${M}x1357 = _mm256_add_epi64(_mm256_mul_epi32(vacc${M}x11335577, vmultiplier), vrounding);
 
-    $for M in range(MR):
-      const __m256i vq31prod${M}x0246 = _mm256_srli_epi64(vprod${M}x0246, 31);
-      const __m256i vq31prod${M}x1357 = _mm256_add_epi64(vprod${M}x1357, vprod${M}x1357);
+      $for M in range(MR):
+        const __m256i vq31prod${M}x0246 = _mm256_srli_epi64(vprod${M}x0246, 31);
+        const __m256i vq31prod${M}x1357 = _mm256_add_epi64(vprod${M}x1357, vprod${M}x1357);
 
-    $for M in range(MR):
-      const __m256i vq31prod${M}x01234567 = _mm256_blend_epi16(vq31prod${M}x0246, vq31prod${M}x1357, 0xCC);
+      $for M in range(MR):
+        const __m256i vq31prod${M}x01234567 = _mm256_blend_epi16(vq31prod${M}x0246, vq31prod${M}x1357, 0xCC);
 
-    const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
-    $for M in range(MR):
-      const __m256i vrem${M}x01234567 =
-        _mm256_add_epi32(_mm256_and_si256(vq31prod${M}x01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod${M}x01234567));
+      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
+      $for M in range(MR):
+        const __m256i vrem${M}x01234567 =
+          _mm256_add_epi32(_mm256_and_si256(vq31prod${M}x01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod${M}x01234567));
 
-    const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-    $if M > 1:
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
-    $else:
-      const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
-    $for M in range(MR):
-      vacc${M}x01234567 =
-        _mm256_sub_epi32(_mm256_sra_epi32(vq31prod${M}x01234567, vshift), _mm256_cmpgt_epi32(vrem${M}x01234567, vremainder_threshold));
+      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+      $if M > 1:
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_avx2.shift);
+      $else:
+        const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.shift);
+      $for M in range(MR):
+        vacc${M}x01234567 =
+          _mm256_sub_epi32(_mm256_sra_epi32(vq31prod${M}x01234567, vshift), _mm256_cmpgt_epi32(vrem${M}x01234567, vremainder_threshold));
+    $elif REQUANTIZATION == "FP32":
+      $for M in range(MR):
+        __m256 vscaled${M}x01234567 = _mm256_cvtepi32_ps(vacc${M}x01234567);
 
-    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx2.output_zero_point);
+      const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+      $for M in range(MR):
+        vscaled${M}x01234567 = _mm256_mul_ps(vscaled${M}x01234567, vscale);
+
+      $for M in range(MR):
+        vacc${M}x01234567 = _mm256_cvtps_epi32(vscaled${M}x01234567);
+
+    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->${REQUANTIZATION.lower()}_avx2.output_zero_point);
     $for M in range(0, MR, 2):
       __m256i vacc${M}${min(M+1, MR-1)}x01234567 = _mm256_adds_epi16(_mm256_packs_epi32(vacc${M}x01234567, vacc${min(M+1, MR-1)}x01234567), voutput_zero_point);
 
@@ -157,8 +169,8 @@
     $else:
       __m256i vout = _mm256_packs_epi16(vacc0${min(1, MR-1)}x01234567, vacc0${min(1, MR-1)}x01234567);
 
-    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->avx2.output_min));
-    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->avx2.output_max));
+    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->${REQUANTIZATION.lower()}_avx2.output_min));
+    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->${REQUANTIZATION.lower()}_avx2.output_max));
 
     __m128i vout_lo = _mm256_castsi256_si128(vout);
     __m128i vout_hi = _mm256_extracti128_si256(vout, 1);
diff --git a/src/qs8-gemm/MRxNRc4-neondot.c.in b/src/qs8-gemm/MRxNRc4-neondot.c.in
index d395508..eb3af38 100644
--- a/src/qs8-gemm/MRxNRc4-neondot.c.in
+++ b/src/qs8-gemm/MRxNRc4-neondot.c.in
@@ -105,12 +105,12 @@
     }
 
     // Post-accumulation work
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
 
     $for M in range(MR):
       $for N in range(0, NR, 4):
-        const int32x4_t vproduct${M}x${ABC[N:N+4]} = vqrdmulhq_n_s32(vacc${M}x${ABC[N:N+4]}, params->neon.multiplier);
+        const int32x4_t vproduct${M}x${ABC[N:N+4]} = vqrdmulhq_n_s32(vacc${M}x${ABC[N:N+4]}, params->gemmlowp_neon.multiplier);
 
     $for M in range(MR):
       $for N in range(0, NR, 4):
@@ -120,7 +120,7 @@
       $for N in range(0, NR, 4):
         vacc${M}x${ABC[N:N+4]} = vrshlq_s32(vacc${M}x${ABC[N:N+4]}, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     $for M in range(MR):
       $for N in range(0, NR, 8):
@@ -149,11 +149,11 @@
           int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
 #endif
     $if NR == 8 and MR == 1:
-      const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-      const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+      const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
     $else:
-      const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-      const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+      const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     $for M in range(MR):
       $for N in range(0, NR, 16):
diff --git a/src/qs8-gemm/c16-neon-mlal-padal.c.in b/src/qs8-gemm/c16-neon-mlal-padal.c.in
index 7392300..ebfd029 100644
--- a/src/qs8-gemm/c16-neon-mlal-padal.c.in
+++ b/src/qs8-gemm/c16-neon-mlal-padal.c.in
@@ -104,12 +104,12 @@
         int32x4_t vacc${M}x${ABC[N:N+4]} = vcombine_s32(vsum${M}x${ABC[N:N+2]}, vsum${M}x${ABC[N+2:N+4]} );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     $for M in range(MR):
       $for N in range(0, NR, 4):
         vacc${M}x${ABC[N:N+4]} = vqrdmulhq_s32(vacc${M}x${ABC[N:N+4]}, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     $for M in range(MR):
       $for N in range(0, NR, 4):
@@ -119,7 +119,7 @@
       $for N in range(0, NR, 4):
         vacc${M}x${ABC[N:N+4]} = vrshlq_s32(vacc${M}x${ABC[N:N+4]}, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     $for M in range(MR):
       $for N in range(0, NR, 8):
@@ -147,11 +147,11 @@
           int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
 #endif
     $if NR == 8 and MR == 1:
-      const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-      const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+      const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
     $else:
-      const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-      const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+      const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     $for M in range(MR):
       $for N in range(0, NR, 16):
diff --git a/src/qs8-gemm/c2-neon-mull-padal-dup.c.in b/src/qs8-gemm/c2-neon-mull-padal-dup.c.in
index 2914567..54e9881 100644
--- a/src/qs8-gemm/c2-neon-mull-padal-dup.c.in
+++ b/src/qs8-gemm/c2-neon-mull-padal-dup.c.in
@@ -139,12 +139,12 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     $for M in range(MR):
       $for N in range(0, NR, 4):
         vacc${M}x${ABC[N:N+4]} = vqrdmulhq_s32(vacc${M}x${ABC[N:N+4]}, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     $for M in range(MR):
       $for N in range(0, NR, 4):
@@ -154,7 +154,7 @@
       $for N in range(0, NR, 4):
         vacc${M}x${ABC[N:N+4]} = vrshlq_s32(vacc${M}x${ABC[N:N+4]}, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     $for M in range(MR):
       $for N in range(0, NR, 8):
@@ -183,11 +183,11 @@
           int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
 #endif
     $if NR == 8 and MR == 1:
-      const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-      const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+      const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
     $else:
-      const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-      const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+      const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     $for M in range(MR):
       $for N in range(0, NR, 16):
diff --git a/src/qs8-gemm/c8-neon-mull-padal.c.in b/src/qs8-gemm/c8-neon-mull-padal.c.in
index da184fb..e704355 100644
--- a/src/qs8-gemm/c8-neon-mull-padal.c.in
+++ b/src/qs8-gemm/c8-neon-mull-padal.c.in
@@ -122,12 +122,12 @@
         int32x4_t vacc${M}x${ABC[N:N+4]} = vcombine_s32(vsum${M}x${ABC[N:N+2]}, vsum${M}x${ABC[N+2:N+4]} );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     $for M in range(MR):
       $for N in range(0, NR, 4):
         vacc${M}x${ABC[N:N+4]} = vqrdmulhq_s32(vacc${M}x${ABC[N:N+4]}, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     $for M in range(MR):
       $for N in range(0, NR, 4):
@@ -137,7 +137,7 @@
       $for N in range(0, NR, 4):
         vacc${M}x${ABC[N:N+4]} = vrshlq_s32(vacc${M}x${ABC[N:N+4]}, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     $for M in range(MR):
       $for N in range(0, NR, 8):
@@ -165,11 +165,11 @@
           int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
 #endif
     $if NR == 8 and MR == 1:
-      const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-      const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+      const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
     $else:
-      const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-      const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+      const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     $for M in range(MR):
       $for N in range(0, NR, 16):
diff --git a/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 231fdae..2a95279 100644
--- a/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -224,13 +224,13 @@
       }
     }
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
     vacc0xCDEF = vqrdmulhq_s32(vacc0xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -242,7 +242,7 @@
     vacc0x89AB = vrshlq_s32(vacc0x89AB, vright_shift);
     vacc0xCDEF = vrshlq_s32(vacc0xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -254,8 +254,8 @@
 
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
 
diff --git a/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane.c
index c00e5f7..ab31654 100644
--- a/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -222,13 +222,13 @@
       }
     }
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
     vacc0xCDEF = vqrdmulhq_s32(vacc0xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -240,7 +240,7 @@
     vacc0x89AB = vrshlq_s32(vacc0x89AB, vright_shift);
     vacc0xCDEF = vrshlq_s32(vacc0xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -252,8 +252,8 @@
 
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
 
diff --git a/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mull-addw-dup.c
index b721ca4..9e9ed9c 100644
--- a/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -217,13 +217,13 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
     vacc0xCDEF = vqrdmulhq_s32(vacc0xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -235,7 +235,7 @@
     vacc0x89AB = vrshlq_s32(vacc0x89AB, vright_shift);
     vacc0xCDEF = vrshlq_s32(vacc0xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -247,8 +247,8 @@
 
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
 
diff --git a/src/qs8-gemm/gen/1x16c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/1x16c16-minmax-gemmlowp-neon-mlal-padal.c
index 3a58012..42751c2 100644
--- a/src/qs8-gemm/gen/1x16c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/1x16c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -176,13 +176,13 @@
     int32x4_t vacc0xCDEF = vcombine_s32(vsum0xCD, vsum0xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
     vacc0xCDEF = vqrdmulhq_s32(vacc0xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -194,7 +194,7 @@
     vacc0x89AB = vrshlq_s32(vacc0x89AB, vright_shift);
     vacc0xCDEF = vrshlq_s32(vacc0xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -205,8 +205,8 @@
 
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
 
diff --git a/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index f76e520..465927a 100644
--- a/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -242,13 +242,13 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
     vacc0xCDEF = vqrdmulhq_s32(vacc0xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -260,7 +260,7 @@
     vacc0x89AB = vrshlq_s32(vacc0x89AB, vright_shift);
     vacc0xCDEF = vrshlq_s32(vacc0xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -272,8 +272,8 @@
 
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
 
diff --git a/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
index df0e6fb..50e5fc7 100644
--- a/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -154,13 +154,13 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
     vacc0xCDEF = vqrdmulhq_s32(vacc0xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -172,7 +172,7 @@
     vacc0x89AB = vrshlq_s32(vacc0x89AB, vright_shift);
     vacc0xCDEF = vrshlq_s32(vacc0xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -184,8 +184,8 @@
 
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
 
diff --git a/src/qs8-gemm/gen/1x16c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/1x16c4-minmax-gemmlowp-neondot.c
index 1d8d2aa..f0bcc60 100644
--- a/src/qs8-gemm/gen/1x16c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/1x16c4-minmax-gemmlowp-neondot.c
@@ -97,13 +97,13 @@
     }
 
     // Post-accumulation work
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
 
-    const int32x4_t vproduct0x0123 = vqrdmulhq_n_s32(vacc0x0123, params->neon.multiplier);
-    const int32x4_t vproduct0x4567 = vqrdmulhq_n_s32(vacc0x4567, params->neon.multiplier);
-    const int32x4_t vproduct0x89AB = vqrdmulhq_n_s32(vacc0x89AB, params->neon.multiplier);
-    const int32x4_t vproduct0xCDEF = vqrdmulhq_n_s32(vacc0xCDEF, params->neon.multiplier);
+    const int32x4_t vproduct0x0123 = vqrdmulhq_n_s32(vacc0x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct0x4567 = vqrdmulhq_n_s32(vacc0x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct0x89AB = vqrdmulhq_n_s32(vacc0x89AB, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct0xCDEF = vqrdmulhq_n_s32(vacc0xCDEF, params->gemmlowp_neon.multiplier);
 
     vacc0x0123 = vsraq_n_s32(vproduct0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vproduct0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -115,7 +115,7 @@
     vacc0x89AB = vrshlq_s32(vacc0x89AB, vright_shift);
     vacc0xCDEF = vrshlq_s32(vacc0xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -127,8 +127,8 @@
 
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
 
diff --git a/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-avx512skx.c b/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-avx512skx.c
index a79b986..9eb5ad8 100644
--- a/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-avx512skx.c
+++ b/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-avx512skx.c
@@ -43,14 +43,14 @@
 
   const __mmask16 vbias_mask = _cvtu32_mask16(0x1111);
   const __mmask16 vblend_mask = _cvtu32_mask16(0xAAAA);
-  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.multiplier));
-  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.rounding));
-  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.remainder_mask));
-  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.remainder_threshold));
-  const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
-  const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse4.output_zero_point));
-  const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
-  const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier));
+  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding));
+  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask));
+  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold));
+  const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
+  const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point));
+  const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
+  const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
   do {
     __m512i vacc0x0123 = _mm512_maskz_expandloadu_epi32(vbias_mask, w);
     __m512i vacc0x4567 = _mm512_maskz_expandloadu_epi32(vbias_mask, (const void*) ((uintptr_t) w + 4 * sizeof(int32_t)));
diff --git a/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mlal-padal.c
index ff36aa9..3609135 100644
--- a/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -249,13 +249,13 @@
     int32x4_t vacc0xCDEF = vcombine_s32(vsum0xCD, vsum0xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
     vacc0xCDEF = vqrdmulhq_s32(vacc0xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -267,7 +267,7 @@
     vacc0x89AB = vrshlq_s32(vacc0x89AB, vright_shift);
     vacc0xCDEF = vrshlq_s32(vacc0xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -278,8 +278,8 @@
 
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
 
diff --git a/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mull-padal.c
index ed1d96d..e1c439d 100644
--- a/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mull-padal.c
@@ -160,13 +160,13 @@
     int32x4_t vacc0xCDEF = vcombine_s32(vsum0xCD, vsum0xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
     vacc0xCDEF = vqrdmulhq_s32(vacc0xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -178,7 +178,7 @@
     vacc0x89AB = vrshlq_s32(vacc0x89AB, vright_shift);
     vacc0xCDEF = vrshlq_s32(vacc0xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -189,8 +189,8 @@
 
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
 
diff --git a/src/qs8-gemm/gen/1x2-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/1x2-minmax-gemmlowp-scalar.c
index 65fa4d8..7a91fad 100644
--- a/src/qs8-gemm/gen/1x2-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/1x2-minmax-gemmlowp-scalar.c
@@ -52,7 +52,7 @@
       k -= sizeof(int8_t);
     } while (k != 0);
 
-    const int32_t vmultiplier = params->scalar.multiplier;
+    const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
     const int64_t vproduct0x0 = (int64_t) vacc0x0 * (int64_t) vmultiplier;
     const int64_t vproduct0x1 = (int64_t) vacc0x1 * (int64_t) vmultiplier;
 
@@ -60,24 +60,24 @@
     const int32_t vq31product0x0 = (int32_t) (uint32_t) ((uint64_t) (vproduct0x0 + vq31rounding) >> 31);
     const int32_t vq31product0x1 = (int32_t) (uint32_t) ((uint64_t) (vproduct0x1 + vq31rounding) >> 31);
 
-    const int32_t vremainder_mask = params->scalar.remainder_mask;
+    const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
     const int32_t vremainder0x0 = (vq31product0x0 & vremainder_mask) - (int32_t) (vq31product0x0 < 0);
     const int32_t vremainder0x1 = (vq31product0x1 & vremainder_mask) - (int32_t) (vq31product0x1 < 0);
 
-    const uint32_t vshift = params->scalar.shift;
-    const int32_t vremainder_threshold = params->scalar.remainder_threshold;
+    const uint32_t vshift = params->gemmlowp_scalar.shift;
+    const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
     int32_t vout0x0 = asr_s32(vq31product0x0, vshift) + (int32_t) (vremainder0x0 > vremainder_threshold);
     int32_t vout0x1 = asr_s32(vq31product0x1, vshift) + (int32_t) (vremainder0x1 > vremainder_threshold);
 
-    const int32_t vout_min = params->scalar.output_min_less_zero_point;
+    const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
     vout0x0 = math_max_s32(vout0x0, vout_min);
     vout0x1 = math_max_s32(vout0x1, vout_min);
 
-    const int32_t vout_max = params->scalar.output_max_less_zero_point;
+    const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
     vout0x0 = math_min_s32(vout0x0, vout_max);
     vout0x1 = math_min_s32(vout0x1, vout_max);
 
-    const int32_t voutput_zero_point = params->scalar.output_zero_point;
+    const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
     vout0x0 += voutput_zero_point;
     vout0x1 += voutput_zero_point;
 
diff --git a/src/qs8-gemm/gen/1x4-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/1x4-minmax-gemmlowp-scalar.c
index dbd2cd8..31866f0 100644
--- a/src/qs8-gemm/gen/1x4-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/1x4-minmax-gemmlowp-scalar.c
@@ -58,7 +58,7 @@
       k -= sizeof(int8_t);
     } while (k != 0);
 
-    const int32_t vmultiplier = params->scalar.multiplier;
+    const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
     const int64_t vproduct0x0 = (int64_t) vacc0x0 * (int64_t) vmultiplier;
     const int64_t vproduct0x1 = (int64_t) vacc0x1 * (int64_t) vmultiplier;
     const int64_t vproduct0x2 = (int64_t) vacc0x2 * (int64_t) vmultiplier;
@@ -70,32 +70,32 @@
     const int32_t vq31product0x2 = (int32_t) (uint32_t) ((uint64_t) (vproduct0x2 + vq31rounding) >> 31);
     const int32_t vq31product0x3 = (int32_t) (uint32_t) ((uint64_t) (vproduct0x3 + vq31rounding) >> 31);
 
-    const int32_t vremainder_mask = params->scalar.remainder_mask;
+    const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
     const int32_t vremainder0x0 = (vq31product0x0 & vremainder_mask) - (int32_t) (vq31product0x0 < 0);
     const int32_t vremainder0x1 = (vq31product0x1 & vremainder_mask) - (int32_t) (vq31product0x1 < 0);
     const int32_t vremainder0x2 = (vq31product0x2 & vremainder_mask) - (int32_t) (vq31product0x2 < 0);
     const int32_t vremainder0x3 = (vq31product0x3 & vremainder_mask) - (int32_t) (vq31product0x3 < 0);
 
-    const uint32_t vshift = params->scalar.shift;
-    const int32_t vremainder_threshold = params->scalar.remainder_threshold;
+    const uint32_t vshift = params->gemmlowp_scalar.shift;
+    const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
     int32_t vout0x0 = asr_s32(vq31product0x0, vshift) + (int32_t) (vremainder0x0 > vremainder_threshold);
     int32_t vout0x1 = asr_s32(vq31product0x1, vshift) + (int32_t) (vremainder0x1 > vremainder_threshold);
     int32_t vout0x2 = asr_s32(vq31product0x2, vshift) + (int32_t) (vremainder0x2 > vremainder_threshold);
     int32_t vout0x3 = asr_s32(vq31product0x3, vshift) + (int32_t) (vremainder0x3 > vremainder_threshold);
 
-    const int32_t vout_min = params->scalar.output_min_less_zero_point;
+    const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
     vout0x0 = math_max_s32(vout0x0, vout_min);
     vout0x1 = math_max_s32(vout0x1, vout_min);
     vout0x2 = math_max_s32(vout0x2, vout_min);
     vout0x3 = math_max_s32(vout0x3, vout_min);
 
-    const int32_t vout_max = params->scalar.output_max_less_zero_point;
+    const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
     vout0x0 = math_min_s32(vout0x0, vout_max);
     vout0x1 = math_min_s32(vout0x1, vout_max);
     vout0x2 = math_min_s32(vout0x2, vout_max);
     vout0x3 = math_min_s32(vout0x3, vout_max);
 
-    const int32_t voutput_zero_point = params->scalar.output_zero_point;
+    const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
     vout0x0 += voutput_zero_point;
     vout0x1 += voutput_zero_point;
     vout0x2 += voutput_zero_point;
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld128.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld128.c
index abdb94d..427cb9f 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld128.c
@@ -105,8 +105,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -119,23 +119,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld64.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld64.c
index 4acc0f6..b1c30ad 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld64.c
@@ -105,8 +105,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -119,23 +119,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld128.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld128.c
index 1ae6dca..1836237 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld128.c
@@ -105,8 +105,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
 
@@ -135,20 +135,20 @@
 
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc00x0123 = _mm_min_epi16(_mm_max_epi16(vacc00x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld64.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld64.c
index ad654f3..4d16fd0 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld64.c
@@ -105,8 +105,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
 
@@ -135,20 +135,20 @@
 
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc00x0123 = _mm_min_epi16(_mm_max_epi16(vacc00x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld128.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld128.c
index e630e68..e97a400 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld128.c
@@ -105,8 +105,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -119,23 +119,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld64.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld64.c
index 3360b16..278ad8f 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld64.c
@@ -105,8 +105,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -119,23 +119,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld128.c
index 9eae431..f9cb7c7 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld128.c
@@ -105,8 +105,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
 
@@ -135,20 +135,20 @@
 
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc00x0123 = _mm_min_epi16(_mm_max_epi16(vacc00x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld64.c
index 314f342..6e7a449 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld64.c
@@ -105,8 +105,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
 
@@ -135,20 +135,20 @@
 
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc00x0123 = _mm_min_epi16(_mm_max_epi16(vacc00x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld128.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld128.c
index 5ef3e18..21adf15 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld128.c
@@ -110,8 +110,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -124,23 +124,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld64.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld64.c
index fec0dce..0b07fc7 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld64.c
@@ -110,8 +110,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -124,23 +124,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-avx.c b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-avx.c
index cfa7d99..3fdd41b 100644
--- a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-avx.c
+++ b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-avx.c
@@ -98,8 +98,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -112,23 +112,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse2.c b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse2.c
index 18399ef..37bf918 100644
--- a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse2.c
+++ b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse2.c
@@ -98,8 +98,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
 
@@ -128,20 +128,20 @@
 
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc00x0123 = _mm_min_epi16(_mm_max_epi16(vacc00x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
diff --git a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse41.c b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse41.c
index 7895ded..ca397ec 100644
--- a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse41.c
+++ b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse41.c
@@ -98,8 +98,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -112,23 +112,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-ssse3.c b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-ssse3.c
index 875056a..946b93d 100644
--- a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-ssse3.c
+++ b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-ssse3.c
@@ -98,8 +98,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
 
@@ -128,20 +128,20 @@
 
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc00x0123 = _mm_min_epi16(_mm_max_epi16(vacc00x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
diff --git a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-xop.c b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-xop.c
index 1b212df..543f507 100644
--- a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-xop.c
+++ b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-xop.c
@@ -103,8 +103,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -117,23 +117,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld128.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld128.c
index fc4d147..993b2e4 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld128.c
@@ -77,8 +77,8 @@
 
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -91,23 +91,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld64.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld64.c
index 95e0802..c6f6930 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld64.c
@@ -79,8 +79,8 @@
 
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -93,23 +93,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld128.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld128.c
index 414f82d..182a674 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld128.c
@@ -77,8 +77,8 @@
 
     __m128i vacc0x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x02, vacc0x13), _mm_unpackhi_epi32(vacc0x02, vacc0x13));
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
 
@@ -107,20 +107,20 @@
 
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc00x0123 = _mm_min_epi16(_mm_max_epi16(vacc00x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld64.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld64.c
index bd8a13a..3418021 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld64.c
@@ -79,8 +79,8 @@
 
     __m128i vacc0x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x02, vacc0x13), _mm_unpackhi_epi32(vacc0x02, vacc0x13));
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
 
@@ -109,20 +109,20 @@
 
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc00x0123 = _mm_min_epi16(_mm_max_epi16(vacc00x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld128.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld128.c
index 6aaef3f..8d72bbb 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld128.c
@@ -77,8 +77,8 @@
 
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -91,23 +91,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld64.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld64.c
index 6ff1e07..e63746f 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld64.c
@@ -79,8 +79,8 @@
 
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -93,23 +93,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld128.c
index a885ec0..a54757a 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld128.c
@@ -77,8 +77,8 @@
 
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
 
@@ -107,20 +107,20 @@
 
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc00x0123 = _mm_min_epi16(_mm_max_epi16(vacc00x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld64.c
index c502eb5..bf5e58a 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld64.c
@@ -79,8 +79,8 @@
 
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
 
@@ -109,20 +109,20 @@
 
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc00x0123 = _mm_min_epi16(_mm_max_epi16(vacc00x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld128.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld128.c
index c4af47a..a452a7c 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld128.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld128.c
@@ -91,8 +91,8 @@
 
     const v128_t vacc0x01 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 0, 4, 1, 5);
 
-    const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-    const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+    const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+    const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
     const v128_t vprod0x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc0x01, vmultiplier), vrounding);
     const v128_t vacc0x23 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 2, 6, 3, 7);
 
@@ -100,22 +100,22 @@
 
     const v128_t vq31prod0x0123 = wasm_v32x4_shuffle(vprod0x01, vprod0x23, 1, 3, 5, 7);
 
-    const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+    const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
     const v128_t vrem0x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0x0123, vremainder_mask), wasm_i32x4_lt(vq31prod0x0123, vzero));
 
-    const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-    const int32_t vshift = params->wasmsimd.shift;
+    const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+    const int32_t vshift = params->gemmlowp_wasmsimd.shift;
     vacc0x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0x0123, vshift), wasm_i32x4_gt(vrem0x0123, vthreshold));
 
-    const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+    const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
     v128_t vacc00x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0x0123, vacc0x0123), voutput_zero_point);
 
     v128_t vout = wasm_i8x16_narrow_i16x8(vacc00x0123, vacc00x0123);
 
-    const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
+    const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
     vout = wasm_i8x16_max(vout, voutput_min);
 
-    const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+    const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
     vout = wasm_i8x16_min(vout, voutput_max);
 
     if (nc >= 4) {
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld64.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld64.c
index f97e401..1566b89 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld64.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld64.c
@@ -87,8 +87,8 @@
 
     const v128_t vacc0x01 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 0, 4, 1, 5);
 
-    const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-    const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+    const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+    const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
     const v128_t vprod0x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc0x01, vmultiplier), vrounding);
     const v128_t vacc0x23 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 2, 6, 3, 7);
 
@@ -96,22 +96,22 @@
 
     const v128_t vq31prod0x0123 = wasm_v32x4_shuffle(vprod0x01, vprod0x23, 1, 3, 5, 7);
 
-    const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+    const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
     const v128_t vrem0x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0x0123, vremainder_mask), wasm_i32x4_lt(vq31prod0x0123, vzero));
 
-    const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-    const int32_t vshift = params->wasmsimd.shift;
+    const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+    const int32_t vshift = params->gemmlowp_wasmsimd.shift;
     vacc0x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0x0123, vshift), wasm_i32x4_gt(vrem0x0123, vthreshold));
 
-    const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+    const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
     v128_t vacc00x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0x0123, vacc0x0123), voutput_zero_point);
 
     v128_t vout = wasm_i8x16_narrow_i16x8(vacc00x0123, vacc00x0123);
 
-    const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
+    const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
     vout = wasm_i8x16_max(vout, voutput_min);
 
-    const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+    const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
     vout = wasm_i8x16_min(vout, voutput_max);
 
     if (nc >= 4) {
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld128.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld128.c
index 26310c8..0809765 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld128.c
@@ -82,8 +82,8 @@
 
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -96,23 +96,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld64.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld64.c
index 393e07d..f6399da 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld64.c
@@ -84,8 +84,8 @@
 
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -98,23 +98,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-avx.c b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-avx.c
index ec0c611..3175475 100644
--- a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-avx.c
+++ b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-avx.c
@@ -75,8 +75,8 @@
 
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -89,23 +89,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse2.c b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse2.c
index 863f22e..ddd443e 100644
--- a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse2.c
+++ b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse2.c
@@ -75,8 +75,8 @@
 
     __m128i vacc0x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x02, vacc0x13), _mm_unpackhi_epi32(vacc0x02, vacc0x13));
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
 
@@ -105,20 +105,20 @@
 
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc00x0123 = _mm_min_epi16(_mm_max_epi16(vacc00x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
diff --git a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse41.c b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse41.c
index 23dee4c..1e07d0d 100644
--- a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse41.c
+++ b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse41.c
@@ -75,8 +75,8 @@
 
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -89,23 +89,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-ssse3.c b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-ssse3.c
index 3ba2f0d..04ec0c9 100644
--- a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-ssse3.c
+++ b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-ssse3.c
@@ -75,8 +75,8 @@
 
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
 
@@ -105,20 +105,20 @@
 
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc00x0123 = _mm_min_epi16(_mm_max_epi16(vacc00x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
diff --git a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-wasmsimd.c b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-wasmsimd.c
index be88364..a093a7e 100644
--- a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-wasmsimd.c
+++ b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-wasmsimd.c
@@ -87,8 +87,8 @@
 
     const v128_t vacc0x01 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 0, 4, 1, 5);
 
-    const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-    const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+    const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+    const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
     const v128_t vprod0x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc0x01, vmultiplier), vrounding);
     const v128_t vacc0x23 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 2, 6, 3, 7);
 
@@ -96,22 +96,22 @@
 
     const v128_t vq31prod0x0123 = wasm_v32x4_shuffle(vprod0x01, vprod0x23, 1, 3, 5, 7);
 
-    const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+    const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
     const v128_t vrem0x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0x0123, vremainder_mask), wasm_i32x4_lt(vq31prod0x0123, vzero));
 
-    const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-    const int32_t vshift = params->wasmsimd.shift;
+    const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+    const int32_t vshift = params->gemmlowp_wasmsimd.shift;
     vacc0x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0x0123, vshift), wasm_i32x4_gt(vrem0x0123, vthreshold));
 
-    const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+    const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
     v128_t vacc00x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0x0123, vacc0x0123), voutput_zero_point);
 
     v128_t vout = wasm_i8x16_narrow_i16x8(vacc00x0123, vacc00x0123);
 
-    const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
+    const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
     vout = wasm_i8x16_max(vout, voutput_min);
 
-    const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+    const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
     vout = wasm_i8x16_min(vout, voutput_max);
 
     if (nc >= 4) {
diff --git a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-xop.c b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-xop.c
index 8766948..674b44b 100644
--- a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-xop.c
+++ b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-xop.c
@@ -80,8 +80,8 @@
 
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -94,23 +94,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 7329a94..3ad4613 100644
--- a/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -153,11 +153,11 @@
       }
     }
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -165,7 +165,7 @@
     vacc0x0123 = vrshlq_s32(vacc0x0123, vright_shift);
     vacc0x4567 = vrshlq_s32(vacc0x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
 
@@ -175,8 +175,8 @@
 
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
 #endif
-    const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-    const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+    const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567 = vmax_s8(vout0x01234567, voutput_min);
 
diff --git a/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c
index f3a68a8..62da1a8 100644
--- a/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -152,11 +152,11 @@
       }
     }
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -164,7 +164,7 @@
     vacc0x0123 = vrshlq_s32(vacc0x0123, vright_shift);
     vacc0x4567 = vrshlq_s32(vacc0x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
 
@@ -174,8 +174,8 @@
 
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
 #endif
-    const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-    const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+    const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567 = vmax_s8(vout0x01234567, voutput_min);
 
diff --git a/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c
index a5a8fd6..faf07de 100644
--- a/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -147,11 +147,11 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -159,7 +159,7 @@
     vacc0x0123 = vrshlq_s32(vacc0x0123, vright_shift);
     vacc0x4567 = vrshlq_s32(vacc0x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
 
@@ -169,8 +169,8 @@
 
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
 #endif
-    const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-    const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+    const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567 = vmax_s8(vout0x01234567, voutput_min);
 
diff --git a/src/qs8-gemm/gen/1x8c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/1x8c16-minmax-gemmlowp-neon-mlal-padal.c
index 6e110e0..4fbc6cf 100644
--- a/src/qs8-gemm/gen/1x8c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/1x8c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -116,11 +116,11 @@
     int32x4_t vacc0x4567 = vcombine_s32(vsum0x45, vsum0x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -128,7 +128,7 @@
     vacc0x0123 = vrshlq_s32(vacc0x0123, vright_shift);
     vacc0x4567 = vrshlq_s32(vacc0x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
@@ -137,8 +137,8 @@
 
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
 #endif
-    const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-    const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+    const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567 = vmax_s8(vout0x01234567, voutput_min);
 
diff --git a/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 1944ffe..86a22df 100644
--- a/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -158,11 +158,11 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -170,7 +170,7 @@
     vacc0x0123 = vrshlq_s32(vacc0x0123, vright_shift);
     vacc0x4567 = vrshlq_s32(vacc0x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
 
@@ -180,8 +180,8 @@
 
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
 #endif
-    const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-    const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+    const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567 = vmax_s8(vout0x01234567, voutput_min);
 
diff --git a/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
index 822f923..f73c463 100644
--- a/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -110,11 +110,11 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -122,7 +122,7 @@
     vacc0x0123 = vrshlq_s32(vacc0x0123, vright_shift);
     vacc0x4567 = vrshlq_s32(vacc0x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
 
@@ -132,8 +132,8 @@
 
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
 #endif
-    const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-    const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+    const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567 = vmax_s8(vout0x01234567, voutput_min);
 
diff --git a/src/qs8-gemm/gen/1x8c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/1x8c4-minmax-gemmlowp-neondot.c
index 3e12c01..bec1770 100644
--- a/src/qs8-gemm/gen/1x8c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/1x8c4-minmax-gemmlowp-neondot.c
@@ -83,11 +83,11 @@
     }
 
     // Post-accumulation work
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
 
-    const int32x4_t vproduct0x0123 = vqrdmulhq_n_s32(vacc0x0123, params->neon.multiplier);
-    const int32x4_t vproduct0x4567 = vqrdmulhq_n_s32(vacc0x4567, params->neon.multiplier);
+    const int32x4_t vproduct0x0123 = vqrdmulhq_n_s32(vacc0x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct0x4567 = vqrdmulhq_n_s32(vacc0x4567, params->gemmlowp_neon.multiplier);
 
     vacc0x0123 = vsraq_n_s32(vproduct0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vproduct0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -95,7 +95,7 @@
     vacc0x0123 = vrshlq_s32(vacc0x0123, vright_shift);
     vacc0x4567 = vrshlq_s32(vacc0x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
 
@@ -105,8 +105,8 @@
 
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
 #endif
-    const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-    const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+    const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567 = vmax_s8(vout0x01234567, voutput_min);
 
diff --git a/src/qs8-gemm/gen/1x8c8-minmax-fp32-avx2.c b/src/qs8-gemm/gen/1x8c8-minmax-fp32-avx2.c
new file mode 100644
index 0000000..9c1b57c
--- /dev/null
+++ b/src/qs8-gemm/gen/1x8c8-minmax-fp32-avx2.c
@@ -0,0 +1,146 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-gemm/MRx8c8-avx2.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <immintrin.h>
+
+#include <xnnpack/gemm.h>
+#include <xnnpack/intrinsics-polyfill.h>
+#include <xnnpack/math.h>
+
+
+void xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2(
+    size_t mr,
+    size_t nc,
+    size_t kc,
+    const int8_t* restrict a,
+    size_t a_stride,
+    const void* restrict w,
+    int8_t* restrict c,
+    size_t cm_stride,
+    size_t cn_stride,
+    const union xnn_qs8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN XNN_DISABLE_MSAN
+{
+  assert(mr != 0);
+  assert(mr <= 1);
+  assert(nc != 0);
+  assert(kc != 0);
+  assert(kc % sizeof(int8_t) == 0);
+  assert(a != NULL);
+  assert(w != NULL);
+  assert(c != NULL);
+
+  kc = round_up_po2(kc, 8);
+  const int8_t* a0 = a;
+  int8_t* c0 = c;
+
+  do {
+    const __m128i vbias0x0 = _mm_loadu_si32(w);
+    const __m128i vbias0x1 = _mm_loadu_si32((const void*) ((uintptr_t) w + sizeof(int32_t)));
+    __m256i vacc0x01 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x0), vbias0x1, 1);
+    const __m128i vbias0x2 = _mm_loadu_si32((const void*) ((uintptr_t) w + 2 * sizeof(int32_t)));
+    const __m128i vbias0x3 = _mm_loadu_si32((const void*) ((uintptr_t) w + 3 * sizeof(int32_t)));
+    __m256i vacc0x23 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x2), vbias0x3, 1);
+    const __m128i vbias0x4 = _mm_loadu_si32((const void*) ((uintptr_t) w + 4 * sizeof(int32_t)));
+    const __m128i vbias0x5 = _mm_loadu_si32((const void*) ((uintptr_t) w + 5 * sizeof(int32_t)));
+    __m256i vacc0x45 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x4), vbias0x5, 1);
+    const __m128i vbias0x6 = _mm_loadu_si32((const void*) ((uintptr_t) w + 6 * sizeof(int32_t)));
+    const __m128i vbias0x7 = _mm_loadu_si32((const void*) ((uintptr_t) w + 7 * sizeof(int32_t)));
+    __m256i vacc0x67 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x6), vbias0x7, 1);
+    w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
+
+    size_t k = 0;
+    while (k < kc) {
+      const __m128i va0 = _mm_broadcastq_epi64(_mm_loadl_epi64((const __m128i*) a0));
+      const __m256i vxa0 = _mm256_cvtepi8_epi16(va0);
+      a0 += 8;
+
+      const __m128i vb01 = _mm_load_si128((const __m128i*) w);
+      const __m256i vxb01 = _mm256_cvtepi8_epi16(vb01);
+
+      vacc0x01 = _mm256_add_epi32(vacc0x01, _mm256_madd_epi16(vxa0, vxb01));
+      const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int8_t)));
+      const __m256i vxb23 = _mm256_cvtepi8_epi16(vb23);
+
+      vacc0x23 = _mm256_add_epi32(vacc0x23, _mm256_madd_epi16(vxa0, vxb23));
+      const __m128i vb45 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int8_t)));
+      const __m256i vxb45 = _mm256_cvtepi8_epi16(vb45);
+
+      vacc0x45 = _mm256_add_epi32(vacc0x45, _mm256_madd_epi16(vxa0, vxb45));
+      const __m128i vb67 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 48 * sizeof(int8_t)));
+      const __m256i vxb67 = _mm256_cvtepi8_epi16(vb67);
+
+      vacc0x67 = _mm256_add_epi32(vacc0x67, _mm256_madd_epi16(vxa0, vxb67));
+
+      w = (const void*) ((uintptr_t) w + 64 * sizeof(int8_t));
+      k += 8 * sizeof(int8_t);
+    }
+
+    const __m256i vacc0x0213 = _mm256_hadd_epi32(vacc0x01, vacc0x23);
+    const __m256i vacc0x4657 = _mm256_hadd_epi32(vacc0x45, vacc0x67);
+
+    const __m256i vacc0x02461357 = _mm256_hadd_epi32(vacc0x0213, vacc0x4657);
+
+    const __m256i vpermute_mask = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
+    __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask);
+
+    __m256 vscaled0x01234567 = _mm256_cvtepi32_ps(vacc0x01234567);
+
+    const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+    vscaled0x01234567 = _mm256_mul_ps(vscaled0x01234567, vscale);
+
+    vacc0x01234567 = _mm256_cvtps_epi32(vscaled0x01234567);
+
+    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->fp32_avx2.output_zero_point);
+    __m256i vacc00x01234567 = _mm256_adds_epi16(_mm256_packs_epi32(vacc0x01234567, vacc0x01234567), voutput_zero_point);
+
+    vacc00x01234567 = _mm256_permute4x64_epi64(vacc00x01234567, _MM_SHUFFLE(3, 1, 2, 0));
+
+    __m256i vout = _mm256_packs_epi16(vacc00x01234567, vacc00x01234567);
+
+    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->fp32_avx2.output_min));
+    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->fp32_avx2.output_max));
+
+    __m128i vout_lo = _mm256_castsi256_si128(vout);
+    __m128i vout_hi = _mm256_extracti128_si256(vout, 1);
+
+    if (nc >= 8) {
+      _mm_storel_epi64((__m128i*) c0, vout_lo);
+
+      c0 = (int8_t*) ((uintptr_t) c0 + cn_stride);
+
+      a0 = (const int8_t*) ((uintptr_t) a0 - kc);
+
+      nc -= 8;
+    } else {
+      if (nc & 4) {
+        _mm_storeu_si32(c0, vout_lo);
+
+        c0 += 4;
+
+        vout_lo = _mm_srli_epi64(vout_lo, 32);
+        vout_hi = _mm_srli_epi64(vout_hi, 32);
+      }
+      if (nc & 2) {
+        *((uint16_t*) c0) = (uint16_t) _mm_extract_epi16(vout_lo, 0);
+
+        c0 += 2;
+
+        vout_lo = _mm_srli_epi32(vout_lo, 16);
+        vout_hi = _mm_srli_epi32(vout_hi, 16);
+      }
+      if (nc & 1) {
+        *c0 = (int8_t) _mm_extract_epi8(vout_lo, 0);
+      }
+
+      nc = 0;
+    }
+  } while (nc != 0);
+}
diff --git a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-avx2.c b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-avx2.c
index 17dcb23..e1761aa 100644
--- a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-avx2.c
+++ b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-avx2.c
@@ -91,8 +91,8 @@
     const __m256i vpermute_mask = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
     __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask);
 
-    const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-    const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+    const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+    const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
     const __m256i vacc0x11335577 = _mm256_srli_epi64(vacc0x01234567, 32);
 
@@ -105,24 +105,24 @@
 
     const __m256i vq31prod0x01234567 = _mm256_blend_epi16(vq31prod0x0246, vq31prod0x1357, 0xCC);
 
-    const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+    const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
     const __m256i vrem0x01234567 =
       _mm256_add_epi32(_mm256_and_si256(vq31prod0x01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod0x01234567));
 
-    const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
+    const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.shift);
     vacc0x01234567 =
       _mm256_sub_epi32(_mm256_sra_epi32(vq31prod0x01234567, vshift), _mm256_cmpgt_epi32(vrem0x01234567, vremainder_threshold));
 
-    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx2.output_zero_point);
+    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_zero_point);
     __m256i vacc00x01234567 = _mm256_adds_epi16(_mm256_packs_epi32(vacc0x01234567, vacc0x01234567), voutput_zero_point);
 
     vacc00x01234567 = _mm256_permute4x64_epi64(vacc00x01234567, _MM_SHUFFLE(3, 1, 2, 0));
 
     __m256i vout = _mm256_packs_epi16(vacc00x01234567, vacc00x01234567);
 
-    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->avx2.output_min));
-    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->avx2.output_max));
+    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_min));
+    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_max));
 
     __m128i vout_lo = _mm256_castsi256_si128(vout);
     __m128i vout_hi = _mm256_extracti128_si256(vout, 1);
diff --git a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mlal-padal.c
index f8783fb..85b5d1b 100644
--- a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -157,11 +157,11 @@
     int32x4_t vacc0x4567 = vcombine_s32(vsum0x45, vsum0x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -169,7 +169,7 @@
     vacc0x0123 = vrshlq_s32(vacc0x0123, vright_shift);
     vacc0x4567 = vrshlq_s32(vacc0x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
@@ -178,8 +178,8 @@
 
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
 #endif
-    const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-    const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+    const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567 = vmax_s8(vout0x01234567, voutput_min);
 
diff --git a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mull-padal.c
index 1c010f1..890cecf 100644
--- a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mull-padal.c
@@ -108,11 +108,11 @@
     int32x4_t vacc0x4567 = vcombine_s32(vsum0x45, vsum0x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -120,7 +120,7 @@
     vacc0x0123 = vrshlq_s32(vacc0x0123, vright_shift);
     vacc0x4567 = vrshlq_s32(vacc0x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
@@ -129,8 +129,8 @@
 
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
 #endif
-    const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-    const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+    const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567 = vmax_s8(vout0x01234567, voutput_min);
 
diff --git a/src/qs8-gemm/gen/1x8c8-xw-minmax-gemmlowp-avx2.c b/src/qs8-gemm/gen/1x8c8-xw-minmax-gemmlowp-avx2.c
index 622083f..be4ec37 100644
--- a/src/qs8-gemm/gen/1x8c8-xw-minmax-gemmlowp-avx2.c
+++ b/src/qs8-gemm/gen/1x8c8-xw-minmax-gemmlowp-avx2.c
@@ -87,8 +87,8 @@
     const __m256i vpermute_mask = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
     __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask);
 
-    const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-    const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+    const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+    const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
     const __m256i vacc0x11335577 = _mm256_srli_epi64(vacc0x01234567, 32);
 
@@ -101,24 +101,24 @@
 
     const __m256i vq31prod0x01234567 = _mm256_blend_epi16(vq31prod0x0246, vq31prod0x1357, 0xCC);
 
-    const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+    const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
     const __m256i vrem0x01234567 =
       _mm256_add_epi32(_mm256_and_si256(vq31prod0x01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod0x01234567));
 
-    const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
+    const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.shift);
     vacc0x01234567 =
       _mm256_sub_epi32(_mm256_sra_epi32(vq31prod0x01234567, vshift), _mm256_cmpgt_epi32(vrem0x01234567, vremainder_threshold));
 
-    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx2.output_zero_point);
+    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_zero_point);
     __m256i vacc00x01234567 = _mm256_adds_epi16(_mm256_packs_epi32(vacc0x01234567, vacc0x01234567), voutput_zero_point);
 
     vacc00x01234567 = _mm256_permute4x64_epi64(vacc00x01234567, _MM_SHUFFLE(3, 1, 2, 0));
 
     __m256i vout = _mm256_packs_epi16(vacc00x01234567, vacc00x01234567);
 
-    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->avx2.output_min));
-    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->avx2.output_max));
+    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_min));
+    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_max));
 
     __m128i vout_lo = _mm256_castsi256_si128(vout);
     __m128i vout_hi = _mm256_extracti128_si256(vout, 1);
diff --git a/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 1c940b0..45cb137 100644
--- a/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -298,7 +298,7 @@
       }
     }
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -308,7 +308,7 @@
     vacc1x89AB = vqrdmulhq_s32(vacc1x89AB, vmultiplier);
     vacc1xCDEF = vqrdmulhq_s32(vacc1xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -328,7 +328,7 @@
     vacc1x89AB = vrshlq_s32(vacc1x89AB, vright_shift);
     vacc1xCDEF = vrshlq_s32(vacc1xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -346,8 +346,8 @@
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c
index 7e645ff..2b529c9 100644
--- a/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -296,7 +296,7 @@
       }
     }
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -306,7 +306,7 @@
     vacc1x89AB = vqrdmulhq_s32(vacc1x89AB, vmultiplier);
     vacc1xCDEF = vqrdmulhq_s32(vacc1xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -326,7 +326,7 @@
     vacc1x89AB = vrshlq_s32(vacc1x89AB, vright_shift);
     vacc1xCDEF = vrshlq_s32(vacc1xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -344,8 +344,8 @@
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mull-addw-dup.c
index 4848f85..c51a42b 100644
--- a/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -319,7 +319,7 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -329,7 +329,7 @@
     vacc1x89AB = vqrdmulhq_s32(vacc1x89AB, vmultiplier);
     vacc1xCDEF = vqrdmulhq_s32(vacc1xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -349,7 +349,7 @@
     vacc1x89AB = vrshlq_s32(vacc1x89AB, vright_shift);
     vacc1xCDEF = vrshlq_s32(vacc1xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -367,8 +367,8 @@
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/2x16c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/2x16c16-minmax-gemmlowp-neon-mlal-padal.c
index b7b984a..7409287 100644
--- a/src/qs8-gemm/gen/2x16c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/2x16c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -287,7 +287,7 @@
     int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -297,7 +297,7 @@
     vacc1x89AB = vqrdmulhq_s32(vacc1x89AB, vmultiplier);
     vacc1xCDEF = vqrdmulhq_s32(vacc1xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -317,7 +317,7 @@
     vacc1x89AB = vrshlq_s32(vacc1x89AB, vright_shift);
     vacc1xCDEF = vrshlq_s32(vacc1xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -334,8 +334,8 @@
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index afae933..209dd15 100644
--- a/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -360,7 +360,7 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -370,7 +370,7 @@
     vacc1x89AB = vqrdmulhq_s32(vacc1x89AB, vmultiplier);
     vacc1xCDEF = vqrdmulhq_s32(vacc1xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -390,7 +390,7 @@
     vacc1x89AB = vrshlq_s32(vacc1x89AB, vright_shift);
     vacc1xCDEF = vrshlq_s32(vacc1xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -408,8 +408,8 @@
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
index 9e1bac1..34e8361 100644
--- a/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -222,7 +222,7 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -232,7 +232,7 @@
     vacc1x89AB = vqrdmulhq_s32(vacc1x89AB, vmultiplier);
     vacc1xCDEF = vqrdmulhq_s32(vacc1xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -252,7 +252,7 @@
     vacc1x89AB = vrshlq_s32(vacc1x89AB, vright_shift);
     vacc1xCDEF = vrshlq_s32(vacc1xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -270,8 +270,8 @@
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-avx512skx.c b/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-avx512skx.c
index 5b4ea8c..447dc07 100644
--- a/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-avx512skx.c
+++ b/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-avx512skx.c
@@ -49,14 +49,14 @@
 
   const __mmask16 vbias_mask = _cvtu32_mask16(0x1111);
   const __mmask16 vblend_mask = _cvtu32_mask16(0xAAAA);
-  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.multiplier));
-  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.rounding));
-  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.remainder_mask));
-  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.remainder_threshold));
-  const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
-  const __m512i voutput_zero_point = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.output_zero_point));
-  const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse4.output_min));
-  const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse4.output_max));
+  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier));
+  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding));
+  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask));
+  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold));
+  const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
+  const __m512i voutput_zero_point = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point));
+  const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+  const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
   do {
     __m512i vacc0x0123 = _mm512_maskz_expandloadu_epi32(vbias_mask, w);
     __m512i vacc0x4567 = _mm512_maskz_expandloadu_epi32(vbias_mask, (const void*) ((uintptr_t) w + 4 * sizeof(int32_t)));
diff --git a/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mlal-padal.c
index e0b9794..716c0c3 100644
--- a/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -394,7 +394,7 @@
     int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -404,7 +404,7 @@
     vacc1x89AB = vqrdmulhq_s32(vacc1x89AB, vmultiplier);
     vacc1xCDEF = vqrdmulhq_s32(vacc1xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -424,7 +424,7 @@
     vacc1x89AB = vrshlq_s32(vacc1x89AB, vright_shift);
     vacc1xCDEF = vrshlq_s32(vacc1xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -441,8 +441,8 @@
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mull-padal.c
index 4d865b4..dbdaecc 100644
--- a/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mull-padal.c
@@ -255,7 +255,7 @@
     int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -265,7 +265,7 @@
     vacc1x89AB = vqrdmulhq_s32(vacc1x89AB, vmultiplier);
     vacc1xCDEF = vqrdmulhq_s32(vacc1xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -285,7 +285,7 @@
     vacc1x89AB = vrshlq_s32(vacc1x89AB, vright_shift);
     vacc1xCDEF = vrshlq_s32(vacc1xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -302,8 +302,8 @@
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/2x2-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/2x2-minmax-gemmlowp-scalar.c
index 93d7387..9f1c433 100644
--- a/src/qs8-gemm/gen/2x2-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/2x2-minmax-gemmlowp-scalar.c
@@ -63,7 +63,7 @@
       k -= sizeof(int8_t);
     } while (k != 0);
 
-    const int32_t vmultiplier = params->scalar.multiplier;
+    const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
     const int64_t vproduct0x0 = (int64_t) vacc0x0 * (int64_t) vmultiplier;
     const int64_t vproduct0x1 = (int64_t) vacc0x1 * (int64_t) vmultiplier;
     const int64_t vproduct1x0 = (int64_t) vacc1x0 * (int64_t) vmultiplier;
@@ -75,32 +75,32 @@
     const int32_t vq31product1x0 = (int32_t) (uint32_t) ((uint64_t) (vproduct1x0 + vq31rounding) >> 31);
     const int32_t vq31product1x1 = (int32_t) (uint32_t) ((uint64_t) (vproduct1x1 + vq31rounding) >> 31);
 
-    const int32_t vremainder_mask = params->scalar.remainder_mask;
+    const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
     const int32_t vremainder0x0 = (vq31product0x0 & vremainder_mask) - (int32_t) (vq31product0x0 < 0);
     const int32_t vremainder0x1 = (vq31product0x1 & vremainder_mask) - (int32_t) (vq31product0x1 < 0);
     const int32_t vremainder1x0 = (vq31product1x0 & vremainder_mask) - (int32_t) (vq31product1x0 < 0);
     const int32_t vremainder1x1 = (vq31product1x1 & vremainder_mask) - (int32_t) (vq31product1x1 < 0);
 
-    const uint32_t vshift = params->scalar.shift;
-    const int32_t vremainder_threshold = params->scalar.remainder_threshold;
+    const uint32_t vshift = params->gemmlowp_scalar.shift;
+    const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
     int32_t vout0x0 = asr_s32(vq31product0x0, vshift) + (int32_t) (vremainder0x0 > vremainder_threshold);
     int32_t vout0x1 = asr_s32(vq31product0x1, vshift) + (int32_t) (vremainder0x1 > vremainder_threshold);
     int32_t vout1x0 = asr_s32(vq31product1x0, vshift) + (int32_t) (vremainder1x0 > vremainder_threshold);
     int32_t vout1x1 = asr_s32(vq31product1x1, vshift) + (int32_t) (vremainder1x1 > vremainder_threshold);
 
-    const int32_t vout_min = params->scalar.output_min_less_zero_point;
+    const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
     vout0x0 = math_max_s32(vout0x0, vout_min);
     vout0x1 = math_max_s32(vout0x1, vout_min);
     vout1x0 = math_max_s32(vout1x0, vout_min);
     vout1x1 = math_max_s32(vout1x1, vout_min);
 
-    const int32_t vout_max = params->scalar.output_max_less_zero_point;
+    const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
     vout0x0 = math_min_s32(vout0x0, vout_max);
     vout0x1 = math_min_s32(vout0x1, vout_max);
     vout1x0 = math_min_s32(vout1x0, vout_max);
     vout1x1 = math_min_s32(vout1x1, vout_max);
 
-    const int32_t voutput_zero_point = params->scalar.output_zero_point;
+    const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
     vout0x0 += voutput_zero_point;
     vout0x1 += voutput_zero_point;
     vout1x0 += voutput_zero_point;
diff --git a/src/qs8-gemm/gen/2x4-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/2x4-minmax-gemmlowp-scalar.c
index 7f67584..29d8508 100644
--- a/src/qs8-gemm/gen/2x4-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/2x4-minmax-gemmlowp-scalar.c
@@ -73,7 +73,7 @@
       k -= sizeof(int8_t);
     } while (k != 0);
 
-    const int32_t vmultiplier = params->scalar.multiplier;
+    const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
     const int64_t vproduct0x0 = (int64_t) vacc0x0 * (int64_t) vmultiplier;
     const int64_t vproduct0x1 = (int64_t) vacc0x1 * (int64_t) vmultiplier;
     const int64_t vproduct0x2 = (int64_t) vacc0x2 * (int64_t) vmultiplier;
@@ -93,7 +93,7 @@
     const int32_t vq31product1x2 = (int32_t) (uint32_t) ((uint64_t) (vproduct1x2 + vq31rounding) >> 31);
     const int32_t vq31product1x3 = (int32_t) (uint32_t) ((uint64_t) (vproduct1x3 + vq31rounding) >> 31);
 
-    const int32_t vremainder_mask = params->scalar.remainder_mask;
+    const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
     const int32_t vremainder0x0 = (vq31product0x0 & vremainder_mask) - (int32_t) (vq31product0x0 < 0);
     const int32_t vremainder0x1 = (vq31product0x1 & vremainder_mask) - (int32_t) (vq31product0x1 < 0);
     const int32_t vremainder0x2 = (vq31product0x2 & vremainder_mask) - (int32_t) (vq31product0x2 < 0);
@@ -103,8 +103,8 @@
     const int32_t vremainder1x2 = (vq31product1x2 & vremainder_mask) - (int32_t) (vq31product1x2 < 0);
     const int32_t vremainder1x3 = (vq31product1x3 & vremainder_mask) - (int32_t) (vq31product1x3 < 0);
 
-    const uint32_t vshift = params->scalar.shift;
-    const int32_t vremainder_threshold = params->scalar.remainder_threshold;
+    const uint32_t vshift = params->gemmlowp_scalar.shift;
+    const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
     int32_t vout0x0 = asr_s32(vq31product0x0, vshift) + (int32_t) (vremainder0x0 > vremainder_threshold);
     int32_t vout0x1 = asr_s32(vq31product0x1, vshift) + (int32_t) (vremainder0x1 > vremainder_threshold);
     int32_t vout0x2 = asr_s32(vq31product0x2, vshift) + (int32_t) (vremainder0x2 > vremainder_threshold);
@@ -114,7 +114,7 @@
     int32_t vout1x2 = asr_s32(vq31product1x2, vshift) + (int32_t) (vremainder1x2 > vremainder_threshold);
     int32_t vout1x3 = asr_s32(vq31product1x3, vshift) + (int32_t) (vremainder1x3 > vremainder_threshold);
 
-    const int32_t vout_min = params->scalar.output_min_less_zero_point;
+    const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
     vout0x0 = math_max_s32(vout0x0, vout_min);
     vout0x1 = math_max_s32(vout0x1, vout_min);
     vout0x2 = math_max_s32(vout0x2, vout_min);
@@ -124,7 +124,7 @@
     vout1x2 = math_max_s32(vout1x2, vout_min);
     vout1x3 = math_max_s32(vout1x3, vout_min);
 
-    const int32_t vout_max = params->scalar.output_max_less_zero_point;
+    const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
     vout0x0 = math_min_s32(vout0x0, vout_max);
     vout0x1 = math_min_s32(vout0x1, vout_max);
     vout0x2 = math_min_s32(vout0x2, vout_max);
@@ -134,7 +134,7 @@
     vout1x2 = math_min_s32(vout1x2, vout_max);
     vout1x3 = math_min_s32(vout1x3, vout_max);
 
-    const int32_t voutput_zero_point = params->scalar.output_zero_point;
+    const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
     vout0x0 += voutput_zero_point;
     vout0x1 += voutput_zero_point;
     vout0x2 += voutput_zero_point;
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld128.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld128.c
index 5c8c65f..de0580f 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld128.c
@@ -132,8 +132,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -152,27 +152,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld64.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld64.c
index 73d5422..4213ba5 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld64.c
@@ -132,8 +132,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -152,27 +152,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld128.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld128.c
index b5be8c7..3314380 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld128.c
@@ -132,8 +132,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -176,24 +176,24 @@
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld64.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld64.c
index 453aceb..3e0a980 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld64.c
@@ -132,8 +132,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -176,24 +176,24 @@
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld128.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld128.c
index 1c31e16..e59f622 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld128.c
@@ -132,8 +132,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -152,27 +152,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld64.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld64.c
index b75bcde..5bbf9c3 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld64.c
@@ -132,8 +132,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -152,27 +152,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld128.c
index 3b732b8..a1eb413 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld128.c
@@ -132,8 +132,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -176,24 +176,24 @@
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld64.c
index f177ef9..d917ca9 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld64.c
@@ -132,8 +132,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -176,24 +176,24 @@
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld128.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld128.c
index 6f3dae5..42fec23 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld128.c
@@ -137,8 +137,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -157,27 +157,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld64.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld64.c
index 04f48d1..f225098 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld64.c
@@ -137,8 +137,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -157,27 +157,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-avx.c b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-avx.c
index 6d70934..1f998e1 100644
--- a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-avx.c
+++ b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-avx.c
@@ -125,8 +125,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -145,27 +145,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse2.c b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse2.c
index f040a7b..ea6a894 100644
--- a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse2.c
+++ b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse2.c
@@ -125,8 +125,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -169,24 +169,24 @@
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
diff --git a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse41.c b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse41.c
index 6d6fd5a..ff5840a 100644
--- a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse41.c
+++ b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse41.c
@@ -125,8 +125,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -145,27 +145,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-ssse3.c b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-ssse3.c
index 8f67268..895b0cf 100644
--- a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-ssse3.c
+++ b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-ssse3.c
@@ -125,8 +125,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -169,24 +169,24 @@
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
diff --git a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-xop.c b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-xop.c
index cf0622b..4bc8c55 100644
--- a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-xop.c
+++ b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-xop.c
@@ -130,8 +130,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -150,27 +150,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld128.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld128.c
index 26c20db..df5de10 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld128.c
@@ -97,8 +97,8 @@
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -117,27 +117,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld64.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld64.c
index 9549f20..ae85181 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld64.c
@@ -99,8 +99,8 @@
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -119,27 +119,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld128.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld128.c
index 6afe7f8..85f2c10 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld128.c
@@ -97,8 +97,8 @@
     __m128i vacc0x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x02, vacc0x13), _mm_unpackhi_epi32(vacc0x02, vacc0x13));
     __m128i vacc1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc1x02, vacc1x13));
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -141,24 +141,24 @@
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld64.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld64.c
index aa2c3f8..9813058 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld64.c
@@ -99,8 +99,8 @@
     __m128i vacc0x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x02, vacc0x13), _mm_unpackhi_epi32(vacc0x02, vacc0x13));
     __m128i vacc1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc1x02, vacc1x13));
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -143,24 +143,24 @@
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld128.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld128.c
index 063b30a..1691be1 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld128.c
@@ -97,8 +97,8 @@
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -117,27 +117,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld64.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld64.c
index 086af13..c619ae7 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld64.c
@@ -99,8 +99,8 @@
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -119,27 +119,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld128.c
index 663f448..bd7bd88 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld128.c
@@ -97,8 +97,8 @@
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -141,24 +141,24 @@
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld64.c
index 8e5eca9..2928523 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld64.c
@@ -99,8 +99,8 @@
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -143,24 +143,24 @@
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld128.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld128.c
index eccd1b3..085e9a9 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld128.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld128.c
@@ -120,8 +120,8 @@
     const v128_t vacc0x01 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 0, 4, 1, 5);
     const v128_t vacc1x01 = wasm_v32x4_shuffle(vacc1x0123, vsign1x0123, 0, 4, 1, 5);
 
-    const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-    const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+    const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+    const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
     const v128_t vprod0x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc0x01, vmultiplier), vrounding);
     const v128_t vacc0x23 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 2, 6, 3, 7);
     const v128_t vprod1x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc1x01, vmultiplier), vrounding);
@@ -133,24 +133,24 @@
     const v128_t vq31prod0x0123 = wasm_v32x4_shuffle(vprod0x01, vprod0x23, 1, 3, 5, 7);
     const v128_t vq31prod1x0123 = wasm_v32x4_shuffle(vprod1x01, vprod1x23, 1, 3, 5, 7);
 
-    const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+    const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
     const v128_t vrem0x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0x0123, vremainder_mask), wasm_i32x4_lt(vq31prod0x0123, vzero));
     const v128_t vrem1x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod1x0123, vremainder_mask), wasm_i32x4_lt(vq31prod1x0123, vzero));
 
-    const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-    const int32_t vshift = params->wasmsimd.shift;
+    const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+    const int32_t vshift = params->gemmlowp_wasmsimd.shift;
     vacc0x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0x0123, vshift), wasm_i32x4_gt(vrem0x0123, vthreshold));
     vacc1x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod1x0123, vshift), wasm_i32x4_gt(vrem1x0123, vthreshold));
 
-    const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+    const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
     v128_t vacc01x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0x0123, vacc1x0123), voutput_zero_point);
 
     v128_t vout = wasm_i8x16_narrow_i16x8(vacc01x0123, vacc01x0123);
 
-    const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
+    const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
     vout = wasm_i8x16_max(vout, voutput_min);
 
-    const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+    const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
     vout = wasm_i8x16_min(vout, voutput_max);
 
     if (nc >= 4) {
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld64.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld64.c
index 35141ec..83fdd00 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld64.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld64.c
@@ -116,8 +116,8 @@
     const v128_t vacc0x01 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 0, 4, 1, 5);
     const v128_t vacc1x01 = wasm_v32x4_shuffle(vacc1x0123, vsign1x0123, 0, 4, 1, 5);
 
-    const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-    const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+    const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+    const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
     const v128_t vprod0x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc0x01, vmultiplier), vrounding);
     const v128_t vacc0x23 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 2, 6, 3, 7);
     const v128_t vprod1x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc1x01, vmultiplier), vrounding);
@@ -129,24 +129,24 @@
     const v128_t vq31prod0x0123 = wasm_v32x4_shuffle(vprod0x01, vprod0x23, 1, 3, 5, 7);
     const v128_t vq31prod1x0123 = wasm_v32x4_shuffle(vprod1x01, vprod1x23, 1, 3, 5, 7);
 
-    const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+    const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
     const v128_t vrem0x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0x0123, vremainder_mask), wasm_i32x4_lt(vq31prod0x0123, vzero));
     const v128_t vrem1x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod1x0123, vremainder_mask), wasm_i32x4_lt(vq31prod1x0123, vzero));
 
-    const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-    const int32_t vshift = params->wasmsimd.shift;
+    const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+    const int32_t vshift = params->gemmlowp_wasmsimd.shift;
     vacc0x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0x0123, vshift), wasm_i32x4_gt(vrem0x0123, vthreshold));
     vacc1x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod1x0123, vshift), wasm_i32x4_gt(vrem1x0123, vthreshold));
 
-    const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+    const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
     v128_t vacc01x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0x0123, vacc1x0123), voutput_zero_point);
 
     v128_t vout = wasm_i8x16_narrow_i16x8(vacc01x0123, vacc01x0123);
 
-    const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
+    const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
     vout = wasm_i8x16_max(vout, voutput_min);
 
-    const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+    const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
     vout = wasm_i8x16_min(vout, voutput_max);
 
     if (nc >= 4) {
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld128.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld128.c
index d1d91f5..6903e30 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld128.c
@@ -102,8 +102,8 @@
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -122,27 +122,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld64.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld64.c
index 42f3678..c38e481 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld64.c
@@ -104,8 +104,8 @@
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -124,27 +124,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-avx.c b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-avx.c
index f4f8c0b..86f9c1d 100644
--- a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-avx.c
+++ b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-avx.c
@@ -95,8 +95,8 @@
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -115,27 +115,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse2.c b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse2.c
index 344df2a..8b67fcd 100644
--- a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse2.c
+++ b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse2.c
@@ -95,8 +95,8 @@
     __m128i vacc0x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x02, vacc0x13), _mm_unpackhi_epi32(vacc0x02, vacc0x13));
     __m128i vacc1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc1x02, vacc1x13));
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -139,24 +139,24 @@
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
diff --git a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse41.c b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse41.c
index 5afe66e..ea4d124 100644
--- a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse41.c
+++ b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse41.c
@@ -95,8 +95,8 @@
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -115,27 +115,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-ssse3.c b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-ssse3.c
index 5985c84..54d9ab4 100644
--- a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-ssse3.c
+++ b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-ssse3.c
@@ -95,8 +95,8 @@
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -139,24 +139,24 @@
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
diff --git a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-wasmsimd.c b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-wasmsimd.c
index 02ed0f8..50d00a3 100644
--- a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-wasmsimd.c
+++ b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-wasmsimd.c
@@ -116,8 +116,8 @@
     const v128_t vacc0x01 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 0, 4, 1, 5);
     const v128_t vacc1x01 = wasm_v32x4_shuffle(vacc1x0123, vsign1x0123, 0, 4, 1, 5);
 
-    const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-    const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+    const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+    const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
     const v128_t vprod0x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc0x01, vmultiplier), vrounding);
     const v128_t vacc0x23 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 2, 6, 3, 7);
     const v128_t vprod1x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc1x01, vmultiplier), vrounding);
@@ -129,24 +129,24 @@
     const v128_t vq31prod0x0123 = wasm_v32x4_shuffle(vprod0x01, vprod0x23, 1, 3, 5, 7);
     const v128_t vq31prod1x0123 = wasm_v32x4_shuffle(vprod1x01, vprod1x23, 1, 3, 5, 7);
 
-    const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+    const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
     const v128_t vrem0x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0x0123, vremainder_mask), wasm_i32x4_lt(vq31prod0x0123, vzero));
     const v128_t vrem1x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod1x0123, vremainder_mask), wasm_i32x4_lt(vq31prod1x0123, vzero));
 
-    const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-    const int32_t vshift = params->wasmsimd.shift;
+    const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+    const int32_t vshift = params->gemmlowp_wasmsimd.shift;
     vacc0x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0x0123, vshift), wasm_i32x4_gt(vrem0x0123, vthreshold));
     vacc1x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod1x0123, vshift), wasm_i32x4_gt(vrem1x0123, vthreshold));
 
-    const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+    const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
     v128_t vacc01x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0x0123, vacc1x0123), voutput_zero_point);
 
     v128_t vout = wasm_i8x16_narrow_i16x8(vacc01x0123, vacc01x0123);
 
-    const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
+    const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
     vout = wasm_i8x16_max(vout, voutput_min);
 
-    const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+    const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
     vout = wasm_i8x16_min(vout, voutput_max);
 
     if (nc >= 4) {
diff --git a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-xop.c b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-xop.c
index 6044996..a22b257 100644
--- a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-xop.c
+++ b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-xop.c
@@ -100,8 +100,8 @@
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -120,27 +120,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 87f0291..8208fa0 100644
--- a/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -195,13 +195,13 @@
       }
     }
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
     vacc1x4567 = vqrdmulhq_s32(vacc1x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -213,7 +213,7 @@
     vacc1x0123 = vrshlq_s32(vacc1x0123, vright_shift);
     vacc1x4567 = vrshlq_s32(vacc1x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -225,8 +225,8 @@
 
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
 
diff --git a/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c
index 687a9b3..c5e0cdb 100644
--- a/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -194,13 +194,13 @@
       }
     }
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
     vacc1x4567 = vqrdmulhq_s32(vacc1x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -212,7 +212,7 @@
     vacc1x0123 = vrshlq_s32(vacc1x0123, vright_shift);
     vacc1x4567 = vrshlq_s32(vacc1x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -224,8 +224,8 @@
 
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
 
diff --git a/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mull-addw-dup.c
index de05fc5..1df366c 100644
--- a/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -202,13 +202,13 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
     vacc1x4567 = vqrdmulhq_s32(vacc1x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -220,7 +220,7 @@
     vacc1x0123 = vrshlq_s32(vacc1x0123, vright_shift);
     vacc1x4567 = vrshlq_s32(vacc1x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -232,8 +232,8 @@
 
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
 
diff --git a/src/qs8-gemm/gen/2x8c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/2x8c16-minmax-gemmlowp-neon-mlal-padal.c
index 64eafde..9e6ecc3 100644
--- a/src/qs8-gemm/gen/2x8c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/2x8c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -175,13 +175,13 @@
     int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
     vacc1x4567 = vqrdmulhq_s32(vacc1x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -193,7 +193,7 @@
     vacc1x0123 = vrshlq_s32(vacc1x0123, vright_shift);
     vacc1x4567 = vrshlq_s32(vacc1x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -204,8 +204,8 @@
 
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
 
diff --git a/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 8531b93..0572c92 100644
--- a/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -222,13 +222,13 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
     vacc1x4567 = vqrdmulhq_s32(vacc1x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -240,7 +240,7 @@
     vacc1x0123 = vrshlq_s32(vacc1x0123, vright_shift);
     vacc1x4567 = vrshlq_s32(vacc1x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -252,8 +252,8 @@
 
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
 
diff --git a/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
index de48d33..9741b73 100644
--- a/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -148,13 +148,13 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
     vacc1x4567 = vqrdmulhq_s32(vacc1x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -166,7 +166,7 @@
     vacc1x0123 = vrshlq_s32(vacc1x0123, vright_shift);
     vacc1x4567 = vrshlq_s32(vacc1x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -178,8 +178,8 @@
 
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
 
diff --git a/src/qs8-gemm/gen/2x8c8-minmax-fp32-avx2.c b/src/qs8-gemm/gen/2x8c8-minmax-fp32-avx2.c
new file mode 100644
index 0000000..d63d4f9
--- /dev/null
+++ b/src/qs8-gemm/gen/2x8c8-minmax-fp32-avx2.c
@@ -0,0 +1,178 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-gemm/MRx8c8-avx2.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <immintrin.h>
+
+#include <xnnpack/gemm.h>
+#include <xnnpack/intrinsics-polyfill.h>
+#include <xnnpack/math.h>
+
+
+void xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2(
+    size_t mr,
+    size_t nc,
+    size_t kc,
+    const int8_t* restrict a,
+    size_t a_stride,
+    const void* restrict w,
+    int8_t* restrict c,
+    size_t cm_stride,
+    size_t cn_stride,
+    const union xnn_qs8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN XNN_DISABLE_MSAN
+{
+  assert(mr != 0);
+  assert(mr <= 2);
+  assert(nc != 0);
+  assert(kc != 0);
+  assert(kc % sizeof(int8_t) == 0);
+  assert(a != NULL);
+  assert(w != NULL);
+  assert(c != NULL);
+
+  kc = round_up_po2(kc, 8);
+  const int8_t* a0 = a;
+  int8_t* c0 = c;
+  const int8_t* a1 = (const int8_t*) ((uintptr_t) a0 + a_stride);
+  int8_t* c1 = (int8_t*) ((uintptr_t) c0 + cm_stride);
+  if XNN_UNPREDICTABLE(mr != 2) {
+    a1 = a0;
+    c1 = c0;
+  }
+
+  do {
+    const __m128i vbias0x0 = _mm_loadu_si32(w);
+    const __m128i vbias0x1 = _mm_loadu_si32((const void*) ((uintptr_t) w + sizeof(int32_t)));
+    __m256i vacc0x01 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x0), vbias0x1, 1);
+    const __m128i vbias0x2 = _mm_loadu_si32((const void*) ((uintptr_t) w + 2 * sizeof(int32_t)));
+    const __m128i vbias0x3 = _mm_loadu_si32((const void*) ((uintptr_t) w + 3 * sizeof(int32_t)));
+    __m256i vacc0x23 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x2), vbias0x3, 1);
+    const __m128i vbias0x4 = _mm_loadu_si32((const void*) ((uintptr_t) w + 4 * sizeof(int32_t)));
+    const __m128i vbias0x5 = _mm_loadu_si32((const void*) ((uintptr_t) w + 5 * sizeof(int32_t)));
+    __m256i vacc0x45 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x4), vbias0x5, 1);
+    const __m128i vbias0x6 = _mm_loadu_si32((const void*) ((uintptr_t) w + 6 * sizeof(int32_t)));
+    const __m128i vbias0x7 = _mm_loadu_si32((const void*) ((uintptr_t) w + 7 * sizeof(int32_t)));
+    __m256i vacc0x67 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x6), vbias0x7, 1);
+    __m256i vacc1x01 = vacc0x01;
+    __m256i vacc1x23 = vacc0x23;
+    __m256i vacc1x45 = vacc0x45;
+    __m256i vacc1x67 = vacc0x67;
+    w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
+
+    size_t k = 0;
+    while (k < kc) {
+      const __m128i va0 = _mm_broadcastq_epi64(_mm_loadl_epi64((const __m128i*) a0));
+      const __m256i vxa0 = _mm256_cvtepi8_epi16(va0);
+      a0 += 8;
+      const __m128i va1 = _mm_broadcastq_epi64(_mm_loadl_epi64((const __m128i*) a1));
+      const __m256i vxa1 = _mm256_cvtepi8_epi16(va1);
+      a1 += 8;
+
+      const __m128i vb01 = _mm_load_si128((const __m128i*) w);
+      const __m256i vxb01 = _mm256_cvtepi8_epi16(vb01);
+
+      vacc0x01 = _mm256_add_epi32(vacc0x01, _mm256_madd_epi16(vxa0, vxb01));
+      vacc1x01 = _mm256_add_epi32(vacc1x01, _mm256_madd_epi16(vxa1, vxb01));
+      const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int8_t)));
+      const __m256i vxb23 = _mm256_cvtepi8_epi16(vb23);
+
+      vacc0x23 = _mm256_add_epi32(vacc0x23, _mm256_madd_epi16(vxa0, vxb23));
+      vacc1x23 = _mm256_add_epi32(vacc1x23, _mm256_madd_epi16(vxa1, vxb23));
+      const __m128i vb45 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int8_t)));
+      const __m256i vxb45 = _mm256_cvtepi8_epi16(vb45);
+
+      vacc0x45 = _mm256_add_epi32(vacc0x45, _mm256_madd_epi16(vxa0, vxb45));
+      vacc1x45 = _mm256_add_epi32(vacc1x45, _mm256_madd_epi16(vxa1, vxb45));
+      const __m128i vb67 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 48 * sizeof(int8_t)));
+      const __m256i vxb67 = _mm256_cvtepi8_epi16(vb67);
+
+      vacc0x67 = _mm256_add_epi32(vacc0x67, _mm256_madd_epi16(vxa0, vxb67));
+      vacc1x67 = _mm256_add_epi32(vacc1x67, _mm256_madd_epi16(vxa1, vxb67));
+
+      w = (const void*) ((uintptr_t) w + 64 * sizeof(int8_t));
+      k += 8 * sizeof(int8_t);
+    }
+
+    const __m256i vacc0x0213 = _mm256_hadd_epi32(vacc0x01, vacc0x23);
+    const __m256i vacc0x4657 = _mm256_hadd_epi32(vacc0x45, vacc0x67);
+    const __m256i vacc1x0213 = _mm256_hadd_epi32(vacc1x01, vacc1x23);
+    const __m256i vacc1x4657 = _mm256_hadd_epi32(vacc1x45, vacc1x67);
+
+    const __m256i vacc0x02461357 = _mm256_hadd_epi32(vacc0x0213, vacc0x4657);
+    const __m256i vacc1x02461357 = _mm256_hadd_epi32(vacc1x0213, vacc1x4657);
+
+    const __m256i vpermute_mask = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
+    __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask);
+    __m256i vacc1x01234567 = _mm256_permutevar8x32_epi32(vacc1x02461357, vpermute_mask);
+
+    __m256 vscaled0x01234567 = _mm256_cvtepi32_ps(vacc0x01234567);
+    __m256 vscaled1x01234567 = _mm256_cvtepi32_ps(vacc1x01234567);
+
+    const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+    vscaled0x01234567 = _mm256_mul_ps(vscaled0x01234567, vscale);
+    vscaled1x01234567 = _mm256_mul_ps(vscaled1x01234567, vscale);
+
+    vacc0x01234567 = _mm256_cvtps_epi32(vscaled0x01234567);
+    vacc1x01234567 = _mm256_cvtps_epi32(vscaled1x01234567);
+
+    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->fp32_avx2.output_zero_point);
+    __m256i vacc01x01234567 = _mm256_adds_epi16(_mm256_packs_epi32(vacc0x01234567, vacc1x01234567), voutput_zero_point);
+
+    vacc01x01234567 = _mm256_permute4x64_epi64(vacc01x01234567, _MM_SHUFFLE(3, 1, 2, 0));
+
+    __m256i vout = _mm256_packs_epi16(vacc01x01234567, vacc01x01234567);
+
+    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->fp32_avx2.output_min));
+    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->fp32_avx2.output_max));
+
+    __m128i vout_lo = _mm256_castsi256_si128(vout);
+    __m128i vout_hi = _mm256_extracti128_si256(vout, 1);
+
+    if (nc >= 8) {
+      _mm_storel_epi64((__m128i*) c0, vout_lo);
+      _mm_storel_epi64((__m128i*) c1, vout_hi);
+
+      c0 = (int8_t*) ((uintptr_t) c0 + cn_stride);
+      c1 = (int8_t*) ((uintptr_t) c1 + cn_stride);
+
+      a0 = (const int8_t*) ((uintptr_t) a0 - kc);
+      a1 = (const int8_t*) ((uintptr_t) a1 - kc);
+
+      nc -= 8;
+    } else {
+      if (nc & 4) {
+        _mm_storeu_si32(c0, vout_lo);
+        _mm_storeu_si32(c1, vout_hi);
+
+        c0 += 4;
+        c1 += 4;
+
+        vout_lo = _mm_srli_epi64(vout_lo, 32);
+        vout_hi = _mm_srli_epi64(vout_hi, 32);
+      }
+      if (nc & 2) {
+        *((uint16_t*) c0) = (uint16_t) _mm_extract_epi16(vout_lo, 0);
+        *((uint16_t*) c1) = (uint16_t) _mm_extract_epi16(vout_hi, 0);
+
+        c0 += 2;
+        c1 += 2;
+
+        vout_lo = _mm_srli_epi32(vout_lo, 16);
+        vout_hi = _mm_srli_epi32(vout_hi, 16);
+      }
+      if (nc & 1) {
+        *c0 = (int8_t) _mm_extract_epi8(vout_lo, 0);
+        *c1 = (uint8_t) _mm_extract_epi8(vout_hi, 0);
+      }
+
+      nc = 0;
+    }
+  } while (nc != 0);
+}
diff --git a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-avx2.c b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-avx2.c
index d388284..1e39921 100644
--- a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-avx2.c
+++ b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-avx2.c
@@ -112,8 +112,8 @@
     __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask);
     __m256i vacc1x01234567 = _mm256_permutevar8x32_epi32(vacc1x02461357, vpermute_mask);
 
-    const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-    const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+    const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+    const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
     const __m256i vacc0x11335577 = _mm256_srli_epi64(vacc0x01234567, 32);
     const __m256i vacc1x11335577 = _mm256_srli_epi64(vacc1x01234567, 32);
@@ -132,28 +132,28 @@
     const __m256i vq31prod0x01234567 = _mm256_blend_epi16(vq31prod0x0246, vq31prod0x1357, 0xCC);
     const __m256i vq31prod1x01234567 = _mm256_blend_epi16(vq31prod1x0246, vq31prod1x1357, 0xCC);
 
-    const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+    const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
     const __m256i vrem0x01234567 =
       _mm256_add_epi32(_mm256_and_si256(vq31prod0x01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod0x01234567));
     const __m256i vrem1x01234567 =
       _mm256_add_epi32(_mm256_and_si256(vq31prod1x01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod1x01234567));
 
-    const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
+    const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.shift);
     vacc0x01234567 =
       _mm256_sub_epi32(_mm256_sra_epi32(vq31prod0x01234567, vshift), _mm256_cmpgt_epi32(vrem0x01234567, vremainder_threshold));
     vacc1x01234567 =
       _mm256_sub_epi32(_mm256_sra_epi32(vq31prod1x01234567, vshift), _mm256_cmpgt_epi32(vrem1x01234567, vremainder_threshold));
 
-    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx2.output_zero_point);
+    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_zero_point);
     __m256i vacc01x01234567 = _mm256_adds_epi16(_mm256_packs_epi32(vacc0x01234567, vacc1x01234567), voutput_zero_point);
 
     vacc01x01234567 = _mm256_permute4x64_epi64(vacc01x01234567, _MM_SHUFFLE(3, 1, 2, 0));
 
     __m256i vout = _mm256_packs_epi16(vacc01x01234567, vacc01x01234567);
 
-    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->avx2.output_min));
-    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->avx2.output_max));
+    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_min));
+    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_max));
 
     __m128i vout_lo = _mm256_castsi256_si128(vout);
     __m128i vout_hi = _mm256_extracti128_si256(vout, 1);
diff --git a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mlal-padal.c
index 923d485..6d45009 100644
--- a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -234,13 +234,13 @@
     int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
     vacc1x4567 = vqrdmulhq_s32(vacc1x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -252,7 +252,7 @@
     vacc1x0123 = vrshlq_s32(vacc1x0123, vright_shift);
     vacc1x4567 = vrshlq_s32(vacc1x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -263,8 +263,8 @@
 
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
 
diff --git a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mull-padal.c
index e05b02b..612c0bc 100644
--- a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mull-padal.c
@@ -159,13 +159,13 @@
     int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
     vacc1x4567 = vqrdmulhq_s32(vacc1x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -177,7 +177,7 @@
     vacc1x0123 = vrshlq_s32(vacc1x0123, vright_shift);
     vacc1x4567 = vrshlq_s32(vacc1x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -188,8 +188,8 @@
 
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
 
diff --git a/src/qs8-gemm/gen/2x8c8-xw-minmax-gemmlowp-avx2.c b/src/qs8-gemm/gen/2x8c8-xw-minmax-gemmlowp-avx2.c
index 6c39642..810bae4 100644
--- a/src/qs8-gemm/gen/2x8c8-xw-minmax-gemmlowp-avx2.c
+++ b/src/qs8-gemm/gen/2x8c8-xw-minmax-gemmlowp-avx2.c
@@ -108,8 +108,8 @@
     __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask);
     __m256i vacc1x01234567 = _mm256_permutevar8x32_epi32(vacc1x02461357, vpermute_mask);
 
-    const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-    const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+    const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+    const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
     const __m256i vacc0x11335577 = _mm256_srli_epi64(vacc0x01234567, 32);
     const __m256i vacc1x11335577 = _mm256_srli_epi64(vacc1x01234567, 32);
@@ -128,28 +128,28 @@
     const __m256i vq31prod0x01234567 = _mm256_blend_epi16(vq31prod0x0246, vq31prod0x1357, 0xCC);
     const __m256i vq31prod1x01234567 = _mm256_blend_epi16(vq31prod1x0246, vq31prod1x1357, 0xCC);
 
-    const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+    const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
     const __m256i vrem0x01234567 =
       _mm256_add_epi32(_mm256_and_si256(vq31prod0x01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod0x01234567));
     const __m256i vrem1x01234567 =
       _mm256_add_epi32(_mm256_and_si256(vq31prod1x01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod1x01234567));
 
-    const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
+    const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.shift);
     vacc0x01234567 =
       _mm256_sub_epi32(_mm256_sra_epi32(vq31prod0x01234567, vshift), _mm256_cmpgt_epi32(vrem0x01234567, vremainder_threshold));
     vacc1x01234567 =
       _mm256_sub_epi32(_mm256_sra_epi32(vq31prod1x01234567, vshift), _mm256_cmpgt_epi32(vrem1x01234567, vremainder_threshold));
 
-    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx2.output_zero_point);
+    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_zero_point);
     __m256i vacc01x01234567 = _mm256_adds_epi16(_mm256_packs_epi32(vacc0x01234567, vacc1x01234567), voutput_zero_point);
 
     vacc01x01234567 = _mm256_permute4x64_epi64(vacc01x01234567, _MM_SHUFFLE(3, 1, 2, 0));
 
     __m256i vout = _mm256_packs_epi16(vacc01x01234567, vacc01x01234567);
 
-    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->avx2.output_min));
-    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->avx2.output_max));
+    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_min));
+    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_max));
 
     __m128i vout_lo = _mm256_castsi256_si128(vout);
     __m128i vout_hi = _mm256_extracti128_si256(vout, 1);
diff --git a/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 0c56b19..7729647 100644
--- a/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -372,7 +372,7 @@
       }
     }
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -386,7 +386,7 @@
     vacc2x89AB = vqrdmulhq_s32(vacc2x89AB, vmultiplier);
     vacc2xCDEF = vqrdmulhq_s32(vacc2xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -414,7 +414,7 @@
     vacc2x89AB = vrshlq_s32(vacc2x89AB, vright_shift);
     vacc2xCDEF = vrshlq_s32(vacc2xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -438,8 +438,8 @@
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c
index 58deb96..b95411bf 100644
--- a/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -370,7 +370,7 @@
       }
     }
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -384,7 +384,7 @@
     vacc2x89AB = vqrdmulhq_s32(vacc2x89AB, vmultiplier);
     vacc2xCDEF = vqrdmulhq_s32(vacc2xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -412,7 +412,7 @@
     vacc2x89AB = vrshlq_s32(vacc2x89AB, vright_shift);
     vacc2xCDEF = vrshlq_s32(vacc2xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -436,8 +436,8 @@
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mull-addw-dup.c
index b17e441..55436f4 100644
--- a/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -421,7 +421,7 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -435,7 +435,7 @@
     vacc2x89AB = vqrdmulhq_s32(vacc2x89AB, vmultiplier);
     vacc2xCDEF = vqrdmulhq_s32(vacc2xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -463,7 +463,7 @@
     vacc2x89AB = vrshlq_s32(vacc2x89AB, vright_shift);
     vacc2xCDEF = vrshlq_s32(vacc2xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -487,8 +487,8 @@
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/3x16c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/3x16c16-minmax-gemmlowp-neon-mlal-padal.c
index e57ca14..3063ac0 100644
--- a/src/qs8-gemm/gen/3x16c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/3x16c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -398,7 +398,7 @@
     int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -412,7 +412,7 @@
     vacc2x89AB = vqrdmulhq_s32(vacc2x89AB, vmultiplier);
     vacc2xCDEF = vqrdmulhq_s32(vacc2xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -440,7 +440,7 @@
     vacc2x89AB = vrshlq_s32(vacc2x89AB, vright_shift);
     vacc2xCDEF = vrshlq_s32(vacc2xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -463,8 +463,8 @@
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index ce9b048..dca07ae 100644
--- a/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -478,7 +478,7 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -492,7 +492,7 @@
     vacc2x89AB = vqrdmulhq_s32(vacc2x89AB, vmultiplier);
     vacc2xCDEF = vqrdmulhq_s32(vacc2xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -520,7 +520,7 @@
     vacc2x89AB = vrshlq_s32(vacc2x89AB, vright_shift);
     vacc2xCDEF = vrshlq_s32(vacc2xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -544,8 +544,8 @@
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
index b021ba0..01be9bb 100644
--- a/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -290,7 +290,7 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -304,7 +304,7 @@
     vacc2x89AB = vqrdmulhq_s32(vacc2x89AB, vmultiplier);
     vacc2xCDEF = vqrdmulhq_s32(vacc2xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -332,7 +332,7 @@
     vacc2x89AB = vrshlq_s32(vacc2x89AB, vright_shift);
     vacc2xCDEF = vrshlq_s32(vacc2xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -356,8 +356,8 @@
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-avx512skx.c b/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-avx512skx.c
index e94d74c..be2d1b5 100644
--- a/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-avx512skx.c
+++ b/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-avx512skx.c
@@ -55,14 +55,14 @@
 
   const __mmask16 vbias_mask = _cvtu32_mask16(0x1111);
   const __mmask16 vblend_mask = _cvtu32_mask16(0xAAAA);
-  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.multiplier));
-  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.rounding));
-  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.remainder_mask));
-  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.remainder_threshold));
-  const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
-  const __m512i voutput_zero_point = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.output_zero_point));
-  const __m512i voutput_min = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.output_min));
-  const __m512i voutput_max = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.output_max));
+  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier));
+  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding));
+  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask));
+  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold));
+  const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
+  const __m512i voutput_zero_point = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point));
+  const __m512i voutput_min = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+  const __m512i voutput_max = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
   do {
     __m512i vacc0x0123 = _mm512_maskz_expandloadu_epi32(vbias_mask, w);
     __m512i vacc0x4567 = _mm512_maskz_expandloadu_epi32(vbias_mask, (const void*) ((uintptr_t) w + 4 * sizeof(int32_t)));
diff --git a/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mlal-padal.c
index 60be3af..b9aa38f 100644
--- a/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -539,7 +539,7 @@
     int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -553,7 +553,7 @@
     vacc2x89AB = vqrdmulhq_s32(vacc2x89AB, vmultiplier);
     vacc2xCDEF = vqrdmulhq_s32(vacc2xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -581,7 +581,7 @@
     vacc2x89AB = vrshlq_s32(vacc2x89AB, vright_shift);
     vacc2xCDEF = vrshlq_s32(vacc2xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -604,8 +604,8 @@
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mull-padal.c
index ca496b1..d328f83 100644
--- a/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mull-padal.c
@@ -350,7 +350,7 @@
     int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -364,7 +364,7 @@
     vacc2x89AB = vqrdmulhq_s32(vacc2x89AB, vmultiplier);
     vacc2xCDEF = vqrdmulhq_s32(vacc2xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -392,7 +392,7 @@
     vacc2x89AB = vrshlq_s32(vacc2x89AB, vright_shift);
     vacc2xCDEF = vrshlq_s32(vacc2xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -415,8 +415,8 @@
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/3x2-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/3x2-minmax-gemmlowp-scalar.c
index 10a266a..5b9009b 100644
--- a/src/qs8-gemm/gen/3x2-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/3x2-minmax-gemmlowp-scalar.c
@@ -74,7 +74,7 @@
       k -= sizeof(int8_t);
     } while (k != 0);
 
-    const int32_t vmultiplier = params->scalar.multiplier;
+    const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
     const int64_t vproduct0x0 = (int64_t) vacc0x0 * (int64_t) vmultiplier;
     const int64_t vproduct0x1 = (int64_t) vacc0x1 * (int64_t) vmultiplier;
     const int64_t vproduct1x0 = (int64_t) vacc1x0 * (int64_t) vmultiplier;
@@ -90,7 +90,7 @@
     const int32_t vq31product2x0 = (int32_t) (uint32_t) ((uint64_t) (vproduct2x0 + vq31rounding) >> 31);
     const int32_t vq31product2x1 = (int32_t) (uint32_t) ((uint64_t) (vproduct2x1 + vq31rounding) >> 31);
 
-    const int32_t vremainder_mask = params->scalar.remainder_mask;
+    const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
     const int32_t vremainder0x0 = (vq31product0x0 & vremainder_mask) - (int32_t) (vq31product0x0 < 0);
     const int32_t vremainder0x1 = (vq31product0x1 & vremainder_mask) - (int32_t) (vq31product0x1 < 0);
     const int32_t vremainder1x0 = (vq31product1x0 & vremainder_mask) - (int32_t) (vq31product1x0 < 0);
@@ -98,8 +98,8 @@
     const int32_t vremainder2x0 = (vq31product2x0 & vremainder_mask) - (int32_t) (vq31product2x0 < 0);
     const int32_t vremainder2x1 = (vq31product2x1 & vremainder_mask) - (int32_t) (vq31product2x1 < 0);
 
-    const uint32_t vshift = params->scalar.shift;
-    const int32_t vremainder_threshold = params->scalar.remainder_threshold;
+    const uint32_t vshift = params->gemmlowp_scalar.shift;
+    const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
     int32_t vout0x0 = asr_s32(vq31product0x0, vshift) + (int32_t) (vremainder0x0 > vremainder_threshold);
     int32_t vout0x1 = asr_s32(vq31product0x1, vshift) + (int32_t) (vremainder0x1 > vremainder_threshold);
     int32_t vout1x0 = asr_s32(vq31product1x0, vshift) + (int32_t) (vremainder1x0 > vremainder_threshold);
@@ -107,7 +107,7 @@
     int32_t vout2x0 = asr_s32(vq31product2x0, vshift) + (int32_t) (vremainder2x0 > vremainder_threshold);
     int32_t vout2x1 = asr_s32(vq31product2x1, vshift) + (int32_t) (vremainder2x1 > vremainder_threshold);
 
-    const int32_t vout_min = params->scalar.output_min_less_zero_point;
+    const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
     vout0x0 = math_max_s32(vout0x0, vout_min);
     vout0x1 = math_max_s32(vout0x1, vout_min);
     vout1x0 = math_max_s32(vout1x0, vout_min);
@@ -115,7 +115,7 @@
     vout2x0 = math_max_s32(vout2x0, vout_min);
     vout2x1 = math_max_s32(vout2x1, vout_min);
 
-    const int32_t vout_max = params->scalar.output_max_less_zero_point;
+    const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
     vout0x0 = math_min_s32(vout0x0, vout_max);
     vout0x1 = math_min_s32(vout0x1, vout_max);
     vout1x0 = math_min_s32(vout1x0, vout_max);
@@ -123,7 +123,7 @@
     vout2x0 = math_min_s32(vout2x0, vout_max);
     vout2x1 = math_min_s32(vout2x1, vout_max);
 
-    const int32_t voutput_zero_point = params->scalar.output_zero_point;
+    const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
     vout0x0 += voutput_zero_point;
     vout0x1 += voutput_zero_point;
     vout1x0 += voutput_zero_point;
diff --git a/src/qs8-gemm/gen/3x4-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/3x4-minmax-gemmlowp-scalar.c
index 61c64a9..1ae2a6b 100644
--- a/src/qs8-gemm/gen/3x4-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/3x4-minmax-gemmlowp-scalar.c
@@ -88,7 +88,7 @@
       k -= sizeof(int8_t);
     } while (k != 0);
 
-    const int32_t vmultiplier = params->scalar.multiplier;
+    const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
     const int64_t vproduct0x0 = (int64_t) vacc0x0 * (int64_t) vmultiplier;
     const int64_t vproduct0x1 = (int64_t) vacc0x1 * (int64_t) vmultiplier;
     const int64_t vproduct0x2 = (int64_t) vacc0x2 * (int64_t) vmultiplier;
@@ -116,7 +116,7 @@
     const int32_t vq31product2x2 = (int32_t) (uint32_t) ((uint64_t) (vproduct2x2 + vq31rounding) >> 31);
     const int32_t vq31product2x3 = (int32_t) (uint32_t) ((uint64_t) (vproduct2x3 + vq31rounding) >> 31);
 
-    const int32_t vremainder_mask = params->scalar.remainder_mask;
+    const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
     const int32_t vremainder0x0 = (vq31product0x0 & vremainder_mask) - (int32_t) (vq31product0x0 < 0);
     const int32_t vremainder0x1 = (vq31product0x1 & vremainder_mask) - (int32_t) (vq31product0x1 < 0);
     const int32_t vremainder0x2 = (vq31product0x2 & vremainder_mask) - (int32_t) (vq31product0x2 < 0);
@@ -130,8 +130,8 @@
     const int32_t vremainder2x2 = (vq31product2x2 & vremainder_mask) - (int32_t) (vq31product2x2 < 0);
     const int32_t vremainder2x3 = (vq31product2x3 & vremainder_mask) - (int32_t) (vq31product2x3 < 0);
 
-    const uint32_t vshift = params->scalar.shift;
-    const int32_t vremainder_threshold = params->scalar.remainder_threshold;
+    const uint32_t vshift = params->gemmlowp_scalar.shift;
+    const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
     int32_t vout0x0 = asr_s32(vq31product0x0, vshift) + (int32_t) (vremainder0x0 > vremainder_threshold);
     int32_t vout0x1 = asr_s32(vq31product0x1, vshift) + (int32_t) (vremainder0x1 > vremainder_threshold);
     int32_t vout0x2 = asr_s32(vq31product0x2, vshift) + (int32_t) (vremainder0x2 > vremainder_threshold);
@@ -145,7 +145,7 @@
     int32_t vout2x2 = asr_s32(vq31product2x2, vshift) + (int32_t) (vremainder2x2 > vremainder_threshold);
     int32_t vout2x3 = asr_s32(vq31product2x3, vshift) + (int32_t) (vremainder2x3 > vremainder_threshold);
 
-    const int32_t vout_min = params->scalar.output_min_less_zero_point;
+    const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
     vout0x0 = math_max_s32(vout0x0, vout_min);
     vout0x1 = math_max_s32(vout0x1, vout_min);
     vout0x2 = math_max_s32(vout0x2, vout_min);
@@ -159,7 +159,7 @@
     vout2x2 = math_max_s32(vout2x2, vout_min);
     vout2x3 = math_max_s32(vout2x3, vout_min);
 
-    const int32_t vout_max = params->scalar.output_max_less_zero_point;
+    const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
     vout0x0 = math_min_s32(vout0x0, vout_max);
     vout0x1 = math_min_s32(vout0x1, vout_max);
     vout0x2 = math_min_s32(vout0x2, vout_max);
@@ -173,7 +173,7 @@
     vout2x2 = math_min_s32(vout2x2, vout_max);
     vout2x3 = math_min_s32(vout2x3, vout_max);
 
-    const int32_t voutput_zero_point = params->scalar.output_zero_point;
+    const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
     vout0x0 += voutput_zero_point;
     vout0x1 += voutput_zero_point;
     vout0x2 += voutput_zero_point;
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld128.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld128.c
index 885766e..acf1eb3 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld128.c
@@ -159,8 +159,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -185,7 +185,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -193,8 +193,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -202,15 +202,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld64.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld64.c
index 99e227f..1d059b7 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld64.c
@@ -159,8 +159,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -185,7 +185,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -193,8 +193,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -202,15 +202,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld128.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld128.c
index 56635b9..8cc7035 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld128.c
@@ -159,8 +159,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -217,7 +217,7 @@
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -225,8 +225,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -234,12 +234,12 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld64.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld64.c
index 39e555d..a01a80a 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld64.c
@@ -159,8 +159,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -217,7 +217,7 @@
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -225,8 +225,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -234,12 +234,12 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld128.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld128.c
index e106ae8..a6a16fa 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld128.c
@@ -159,8 +159,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -185,7 +185,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -193,8 +193,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -202,15 +202,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld64.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld64.c
index 18c00d6..c60f805 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld64.c
@@ -159,8 +159,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -185,7 +185,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -193,8 +193,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -202,15 +202,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld128.c
index c89d3bc..b693059 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld128.c
@@ -159,8 +159,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -217,7 +217,7 @@
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -225,8 +225,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -234,12 +234,12 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld64.c
index d033069..7193059 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld64.c
@@ -159,8 +159,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -217,7 +217,7 @@
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -225,8 +225,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -234,12 +234,12 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld128.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld128.c
index 49e37b1..525d542 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld128.c
@@ -164,8 +164,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -190,7 +190,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -198,8 +198,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -207,15 +207,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld64.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld64.c
index bb6ea3a..9fb43b5 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld64.c
@@ -164,8 +164,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -190,7 +190,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -198,8 +198,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -207,15 +207,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-avx.c b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-avx.c
index aa07d11..b58ef12 100644
--- a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-avx.c
+++ b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-avx.c
@@ -152,8 +152,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -178,7 +178,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -186,8 +186,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -195,15 +195,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse2.c b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse2.c
index e1535ac..e43eb3e 100644
--- a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse2.c
+++ b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse2.c
@@ -152,8 +152,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -210,7 +210,7 @@
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -218,8 +218,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -227,12 +227,12 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse41.c b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse41.c
index c481125..22dd63a 100644
--- a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse41.c
+++ b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse41.c
@@ -152,8 +152,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -178,7 +178,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -186,8 +186,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -195,15 +195,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-ssse3.c b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-ssse3.c
index 5f8686a..cb0a4f4 100644
--- a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-ssse3.c
+++ b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-ssse3.c
@@ -152,8 +152,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -210,7 +210,7 @@
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -218,8 +218,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -227,12 +227,12 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-xop.c b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-xop.c
index 609ec98..b0554bc 100644
--- a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-xop.c
+++ b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-xop.c
@@ -157,8 +157,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -183,7 +183,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -191,8 +191,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -200,15 +200,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld128.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld128.c
index 42aeaaa..2d31d96 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld128.c
@@ -117,8 +117,8 @@
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
     __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -143,7 +143,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -151,8 +151,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -160,15 +160,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld64.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld64.c
index 367ed89..bc3e045 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld64.c
@@ -119,8 +119,8 @@
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
     __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -145,7 +145,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -153,8 +153,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -162,15 +162,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld128.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld128.c
index 156d22e..083b15f 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld128.c
@@ -117,8 +117,8 @@
     __m128i vacc1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc1x02, vacc1x13));
     __m128i vacc2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc2x02, vacc2x13));
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -175,7 +175,7 @@
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -183,8 +183,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -192,12 +192,12 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld64.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld64.c
index 5e401f5..f470c48 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld64.c
@@ -119,8 +119,8 @@
     __m128i vacc1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc1x02, vacc1x13));
     __m128i vacc2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc2x02, vacc2x13));
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -177,7 +177,7 @@
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -185,8 +185,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -194,12 +194,12 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld128.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld128.c
index d55b408..c8d66cb 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld128.c
@@ -117,8 +117,8 @@
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
     __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -143,7 +143,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -151,8 +151,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -160,15 +160,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld64.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld64.c
index b7e914a..57b96a5 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld64.c
@@ -119,8 +119,8 @@
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
     __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -145,7 +145,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -153,8 +153,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -162,15 +162,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld128.c
index fbd11c4..148842d 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld128.c
@@ -117,8 +117,8 @@
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
     __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -175,7 +175,7 @@
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -183,8 +183,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -192,12 +192,12 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld64.c
index e64c763..6eb9941 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld64.c
@@ -119,8 +119,8 @@
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
     __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -177,7 +177,7 @@
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -185,8 +185,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -194,12 +194,12 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld128.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld128.c
index 02387e4..a37bb42 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld128.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld128.c
@@ -149,8 +149,8 @@
     const v128_t vacc1x01 = wasm_v32x4_shuffle(vacc1x0123, vsign1x0123, 0, 4, 1, 5);
     const v128_t vacc2x01 = wasm_v32x4_shuffle(vacc2x0123, vsign2x0123, 0, 4, 1, 5);
 
-    const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-    const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+    const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+    const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
     const v128_t vprod0x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc0x01, vmultiplier), vrounding);
     const v128_t vacc0x23 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 2, 6, 3, 7);
     const v128_t vprod1x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc1x01, vmultiplier), vrounding);
@@ -166,27 +166,27 @@
     const v128_t vq31prod1x0123 = wasm_v32x4_shuffle(vprod1x01, vprod1x23, 1, 3, 5, 7);
     const v128_t vq31prod2x0123 = wasm_v32x4_shuffle(vprod2x01, vprod2x23, 1, 3, 5, 7);
 
-    const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+    const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
     const v128_t vrem0x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0x0123, vremainder_mask), wasm_i32x4_lt(vq31prod0x0123, vzero));
     const v128_t vrem1x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod1x0123, vremainder_mask), wasm_i32x4_lt(vq31prod1x0123, vzero));
     const v128_t vrem2x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod2x0123, vremainder_mask), wasm_i32x4_lt(vq31prod2x0123, vzero));
 
-    const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-    const int32_t vshift = params->wasmsimd.shift;
+    const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+    const int32_t vshift = params->gemmlowp_wasmsimd.shift;
     vacc0x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0x0123, vshift), wasm_i32x4_gt(vrem0x0123, vthreshold));
     vacc1x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod1x0123, vshift), wasm_i32x4_gt(vrem1x0123, vthreshold));
     vacc2x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod2x0123, vshift), wasm_i32x4_gt(vrem2x0123, vthreshold));
 
-    const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+    const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
     v128_t vacc01x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0x0123, vacc1x0123), voutput_zero_point);
     v128_t vacc22x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc2x0123, vacc2x0123), voutput_zero_point);
 
     v128_t vout = wasm_i8x16_narrow_i16x8(vacc01x0123, vacc22x0123);
 
-    const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
+    const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
     vout = wasm_i8x16_max(vout, voutput_min);
 
-    const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+    const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
     vout = wasm_i8x16_min(vout, voutput_max);
 
     if (nc >= 4) {
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld64.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld64.c
index 150bbfb..6ed27ee 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld64.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld64.c
@@ -145,8 +145,8 @@
     const v128_t vacc1x01 = wasm_v32x4_shuffle(vacc1x0123, vsign1x0123, 0, 4, 1, 5);
     const v128_t vacc2x01 = wasm_v32x4_shuffle(vacc2x0123, vsign2x0123, 0, 4, 1, 5);
 
-    const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-    const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+    const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+    const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
     const v128_t vprod0x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc0x01, vmultiplier), vrounding);
     const v128_t vacc0x23 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 2, 6, 3, 7);
     const v128_t vprod1x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc1x01, vmultiplier), vrounding);
@@ -162,27 +162,27 @@
     const v128_t vq31prod1x0123 = wasm_v32x4_shuffle(vprod1x01, vprod1x23, 1, 3, 5, 7);
     const v128_t vq31prod2x0123 = wasm_v32x4_shuffle(vprod2x01, vprod2x23, 1, 3, 5, 7);
 
-    const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+    const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
     const v128_t vrem0x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0x0123, vremainder_mask), wasm_i32x4_lt(vq31prod0x0123, vzero));
     const v128_t vrem1x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod1x0123, vremainder_mask), wasm_i32x4_lt(vq31prod1x0123, vzero));
     const v128_t vrem2x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod2x0123, vremainder_mask), wasm_i32x4_lt(vq31prod2x0123, vzero));
 
-    const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-    const int32_t vshift = params->wasmsimd.shift;
+    const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+    const int32_t vshift = params->gemmlowp_wasmsimd.shift;
     vacc0x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0x0123, vshift), wasm_i32x4_gt(vrem0x0123, vthreshold));
     vacc1x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod1x0123, vshift), wasm_i32x4_gt(vrem1x0123, vthreshold));
     vacc2x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod2x0123, vshift), wasm_i32x4_gt(vrem2x0123, vthreshold));
 
-    const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+    const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
     v128_t vacc01x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0x0123, vacc1x0123), voutput_zero_point);
     v128_t vacc22x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc2x0123, vacc2x0123), voutput_zero_point);
 
     v128_t vout = wasm_i8x16_narrow_i16x8(vacc01x0123, vacc22x0123);
 
-    const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
+    const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
     vout = wasm_i8x16_max(vout, voutput_min);
 
-    const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+    const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
     vout = wasm_i8x16_min(vout, voutput_max);
 
     if (nc >= 4) {
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld128.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld128.c
index 7bea2fd..c8245f1 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld128.c
@@ -122,8 +122,8 @@
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
     __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -148,7 +148,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -156,8 +156,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -165,15 +165,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld64.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld64.c
index 5bafe10..560ba40 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld64.c
@@ -124,8 +124,8 @@
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
     __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -150,7 +150,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -158,8 +158,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -167,15 +167,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-avx.c b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-avx.c
index 544b4e2..3441a20 100644
--- a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-avx.c
+++ b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-avx.c
@@ -115,8 +115,8 @@
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
     __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -141,7 +141,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -149,8 +149,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -158,15 +158,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse2.c b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse2.c
index d4fb08a..9178d3e 100644
--- a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse2.c
+++ b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse2.c
@@ -115,8 +115,8 @@
     __m128i vacc1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc1x02, vacc1x13));
     __m128i vacc2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc2x02, vacc2x13));
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -173,7 +173,7 @@
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -181,8 +181,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -190,12 +190,12 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse41.c b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse41.c
index b9df585..1e5ef35 100644
--- a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse41.c
+++ b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse41.c
@@ -115,8 +115,8 @@
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
     __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -141,7 +141,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -149,8 +149,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -158,15 +158,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-ssse3.c b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-ssse3.c
index fd41846..65119a7 100644
--- a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-ssse3.c
+++ b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-ssse3.c
@@ -115,8 +115,8 @@
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
     __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -173,7 +173,7 @@
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -181,8 +181,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -190,12 +190,12 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-wasmsimd.c b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-wasmsimd.c
index 5b8c51e..adb57cf 100644
--- a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-wasmsimd.c
+++ b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-wasmsimd.c
@@ -145,8 +145,8 @@
     const v128_t vacc1x01 = wasm_v32x4_shuffle(vacc1x0123, vsign1x0123, 0, 4, 1, 5);
     const v128_t vacc2x01 = wasm_v32x4_shuffle(vacc2x0123, vsign2x0123, 0, 4, 1, 5);
 
-    const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-    const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+    const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+    const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
     const v128_t vprod0x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc0x01, vmultiplier), vrounding);
     const v128_t vacc0x23 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 2, 6, 3, 7);
     const v128_t vprod1x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc1x01, vmultiplier), vrounding);
@@ -162,27 +162,27 @@
     const v128_t vq31prod1x0123 = wasm_v32x4_shuffle(vprod1x01, vprod1x23, 1, 3, 5, 7);
     const v128_t vq31prod2x0123 = wasm_v32x4_shuffle(vprod2x01, vprod2x23, 1, 3, 5, 7);
 
-    const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+    const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
     const v128_t vrem0x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0x0123, vremainder_mask), wasm_i32x4_lt(vq31prod0x0123, vzero));
     const v128_t vrem1x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod1x0123, vremainder_mask), wasm_i32x4_lt(vq31prod1x0123, vzero));
     const v128_t vrem2x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod2x0123, vremainder_mask), wasm_i32x4_lt(vq31prod2x0123, vzero));
 
-    const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-    const int32_t vshift = params->wasmsimd.shift;
+    const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+    const int32_t vshift = params->gemmlowp_wasmsimd.shift;
     vacc0x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0x0123, vshift), wasm_i32x4_gt(vrem0x0123, vthreshold));
     vacc1x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod1x0123, vshift), wasm_i32x4_gt(vrem1x0123, vthreshold));
     vacc2x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod2x0123, vshift), wasm_i32x4_gt(vrem2x0123, vthreshold));
 
-    const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+    const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
     v128_t vacc01x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0x0123, vacc1x0123), voutput_zero_point);
     v128_t vacc22x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc2x0123, vacc2x0123), voutput_zero_point);
 
     v128_t vout = wasm_i8x16_narrow_i16x8(vacc01x0123, vacc22x0123);
 
-    const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
+    const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
     vout = wasm_i8x16_max(vout, voutput_min);
 
-    const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+    const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
     vout = wasm_i8x16_min(vout, voutput_max);
 
     if (nc >= 4) {
diff --git a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-xop.c b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-xop.c
index 995ea8b..7a091ab 100644
--- a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-xop.c
+++ b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-xop.c
@@ -120,8 +120,8 @@
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
     __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -146,7 +146,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -154,8 +154,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -163,15 +163,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 6126c00..b8f255c 100644
--- a/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -237,7 +237,7 @@
       }
     }
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -245,7 +245,7 @@
     vacc2x0123 = vqrdmulhq_s32(vacc2x0123, vmultiplier);
     vacc2x4567 = vqrdmulhq_s32(vacc2x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -261,7 +261,7 @@
     vacc2x0123 = vrshlq_s32(vacc2x0123, vright_shift);
     vacc2x4567 = vrshlq_s32(vacc2x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -277,8 +277,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x8_t vout2x01234567 = vqmovn_s16(vacc2x01234567);
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
     vout2x01234567 = vmax_s8(vout2x01234567, vget_low_s8(voutput_min));
diff --git a/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c
index cd20044..9ec08f6 100644
--- a/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -236,7 +236,7 @@
       }
     }
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -244,7 +244,7 @@
     vacc2x0123 = vqrdmulhq_s32(vacc2x0123, vmultiplier);
     vacc2x4567 = vqrdmulhq_s32(vacc2x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -260,7 +260,7 @@
     vacc2x0123 = vrshlq_s32(vacc2x0123, vright_shift);
     vacc2x4567 = vrshlq_s32(vacc2x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -276,8 +276,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x8_t vout2x01234567 = vqmovn_s16(vacc2x01234567);
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
     vout2x01234567 = vmax_s8(vout2x01234567, vget_low_s8(voutput_min));
diff --git a/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mull-addw-dup.c
index aa3397d..f6608d9 100644
--- a/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -257,7 +257,7 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -265,7 +265,7 @@
     vacc2x0123 = vqrdmulhq_s32(vacc2x0123, vmultiplier);
     vacc2x4567 = vqrdmulhq_s32(vacc2x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -281,7 +281,7 @@
     vacc2x0123 = vrshlq_s32(vacc2x0123, vright_shift);
     vacc2x4567 = vrshlq_s32(vacc2x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -297,8 +297,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x8_t vout2x01234567 = vqmovn_s16(vacc2x01234567);
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
     vout2x01234567 = vmax_s8(vout2x01234567, vget_low_s8(voutput_min));
diff --git a/src/qs8-gemm/gen/3x8c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/3x8c16-minmax-gemmlowp-neon-mlal-padal.c
index 3dd59a6..6c73b05 100644
--- a/src/qs8-gemm/gen/3x8c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/3x8c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -234,7 +234,7 @@
     int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -242,7 +242,7 @@
     vacc2x0123 = vqrdmulhq_s32(vacc2x0123, vmultiplier);
     vacc2x4567 = vqrdmulhq_s32(vacc2x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -258,7 +258,7 @@
     vacc2x0123 = vrshlq_s32(vacc2x0123, vright_shift);
     vacc2x4567 = vrshlq_s32(vacc2x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -273,8 +273,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x8_t vout2x01234567 = vqmovn_s16(vacc2x01234567);
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
     vout2x01234567 = vmax_s8(vout2x01234567, vget_low_s8(voutput_min));
diff --git a/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index e3abc2a..ee3d8fa 100644
--- a/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -286,7 +286,7 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -294,7 +294,7 @@
     vacc2x0123 = vqrdmulhq_s32(vacc2x0123, vmultiplier);
     vacc2x4567 = vqrdmulhq_s32(vacc2x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -310,7 +310,7 @@
     vacc2x0123 = vrshlq_s32(vacc2x0123, vright_shift);
     vacc2x4567 = vrshlq_s32(vacc2x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -326,8 +326,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x8_t vout2x01234567 = vqmovn_s16(vacc2x01234567);
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
     vout2x01234567 = vmax_s8(vout2x01234567, vget_low_s8(voutput_min));
diff --git a/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
index 428330a..7a1ffad 100644
--- a/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -186,7 +186,7 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -194,7 +194,7 @@
     vacc2x0123 = vqrdmulhq_s32(vacc2x0123, vmultiplier);
     vacc2x4567 = vqrdmulhq_s32(vacc2x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -210,7 +210,7 @@
     vacc2x0123 = vrshlq_s32(vacc2x0123, vright_shift);
     vacc2x4567 = vrshlq_s32(vacc2x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -226,8 +226,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x8_t vout2x01234567 = vqmovn_s16(vacc2x01234567);
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
     vout2x01234567 = vmax_s8(vout2x01234567, vget_low_s8(voutput_min));
diff --git a/src/qs8-gemm/gen/3x8c8-minmax-fp32-avx2.c b/src/qs8-gemm/gen/3x8c8-minmax-fp32-avx2.c
new file mode 100644
index 0000000..60b38a7
--- /dev/null
+++ b/src/qs8-gemm/gen/3x8c8-minmax-fp32-avx2.c
@@ -0,0 +1,212 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-gemm/MRx8c8-avx2.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <immintrin.h>
+
+#include <xnnpack/gemm.h>
+#include <xnnpack/intrinsics-polyfill.h>
+#include <xnnpack/math.h>
+
+
+void xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2(
+    size_t mr,
+    size_t nc,
+    size_t kc,
+    const int8_t* restrict a,
+    size_t a_stride,
+    const void* restrict w,
+    int8_t* restrict c,
+    size_t cm_stride,
+    size_t cn_stride,
+    const union xnn_qs8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN XNN_DISABLE_MSAN
+{
+  assert(mr != 0);
+  assert(mr <= 3);
+  assert(nc != 0);
+  assert(kc != 0);
+  assert(kc % sizeof(int8_t) == 0);
+  assert(a != NULL);
+  assert(w != NULL);
+  assert(c != NULL);
+
+  kc = round_up_po2(kc, 8);
+  const int8_t* a0 = a;
+  int8_t* c0 = c;
+  const int8_t* a1 = (const int8_t*) ((uintptr_t) a0 + a_stride);
+  int8_t* c1 = (int8_t*) ((uintptr_t) c0 + cm_stride);
+  if XNN_UNPREDICTABLE(mr < 2) {
+    a1 = a0;
+    c1 = c0;
+  }
+  const int8_t* a2 = (const int8_t*) ((uintptr_t) a1 + a_stride);
+  int8_t* c2 = (int8_t*) ((uintptr_t) c1 + cm_stride);
+  if XNN_UNPREDICTABLE(mr <= 2) {
+    a2 = a1;
+    c2 = c1;
+  }
+
+  do {
+    const __m128i vbias0x0 = _mm_loadu_si32(w);
+    const __m128i vbias0x1 = _mm_loadu_si32((const void*) ((uintptr_t) w + sizeof(int32_t)));
+    __m256i vacc0x01 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x0), vbias0x1, 1);
+    const __m128i vbias0x2 = _mm_loadu_si32((const void*) ((uintptr_t) w + 2 * sizeof(int32_t)));
+    const __m128i vbias0x3 = _mm_loadu_si32((const void*) ((uintptr_t) w + 3 * sizeof(int32_t)));
+    __m256i vacc0x23 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x2), vbias0x3, 1);
+    const __m128i vbias0x4 = _mm_loadu_si32((const void*) ((uintptr_t) w + 4 * sizeof(int32_t)));
+    const __m128i vbias0x5 = _mm_loadu_si32((const void*) ((uintptr_t) w + 5 * sizeof(int32_t)));
+    __m256i vacc0x45 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x4), vbias0x5, 1);
+    const __m128i vbias0x6 = _mm_loadu_si32((const void*) ((uintptr_t) w + 6 * sizeof(int32_t)));
+    const __m128i vbias0x7 = _mm_loadu_si32((const void*) ((uintptr_t) w + 7 * sizeof(int32_t)));
+    __m256i vacc0x67 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x6), vbias0x7, 1);
+    __m256i vacc1x01 = vacc0x01;
+    __m256i vacc1x23 = vacc0x23;
+    __m256i vacc1x45 = vacc0x45;
+    __m256i vacc1x67 = vacc0x67;
+    __m256i vacc2x01 = vacc0x01;
+    __m256i vacc2x23 = vacc0x23;
+    __m256i vacc2x45 = vacc0x45;
+    __m256i vacc2x67 = vacc0x67;
+    w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
+
+    size_t k = 0;
+    while (k < kc) {
+      const __m128i va0 = _mm_broadcastq_epi64(_mm_loadl_epi64((const __m128i*) a0));
+      const __m256i vxa0 = _mm256_cvtepi8_epi16(va0);
+      a0 += 8;
+      const __m128i va1 = _mm_broadcastq_epi64(_mm_loadl_epi64((const __m128i*) a1));
+      const __m256i vxa1 = _mm256_cvtepi8_epi16(va1);
+      a1 += 8;
+      const __m128i va2 = _mm_broadcastq_epi64(_mm_loadl_epi64((const __m128i*) a2));
+      const __m256i vxa2 = _mm256_cvtepi8_epi16(va2);
+      a2 += 8;
+
+      const __m128i vb01 = _mm_load_si128((const __m128i*) w);
+      const __m256i vxb01 = _mm256_cvtepi8_epi16(vb01);
+
+      vacc0x01 = _mm256_add_epi32(vacc0x01, _mm256_madd_epi16(vxa0, vxb01));
+      vacc1x01 = _mm256_add_epi32(vacc1x01, _mm256_madd_epi16(vxa1, vxb01));
+      vacc2x01 = _mm256_add_epi32(vacc2x01, _mm256_madd_epi16(vxa2, vxb01));
+      const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int8_t)));
+      const __m256i vxb23 = _mm256_cvtepi8_epi16(vb23);
+
+      vacc0x23 = _mm256_add_epi32(vacc0x23, _mm256_madd_epi16(vxa0, vxb23));
+      vacc1x23 = _mm256_add_epi32(vacc1x23, _mm256_madd_epi16(vxa1, vxb23));
+      vacc2x23 = _mm256_add_epi32(vacc2x23, _mm256_madd_epi16(vxa2, vxb23));
+      const __m128i vb45 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int8_t)));
+      const __m256i vxb45 = _mm256_cvtepi8_epi16(vb45);
+
+      vacc0x45 = _mm256_add_epi32(vacc0x45, _mm256_madd_epi16(vxa0, vxb45));
+      vacc1x45 = _mm256_add_epi32(vacc1x45, _mm256_madd_epi16(vxa1, vxb45));
+      vacc2x45 = _mm256_add_epi32(vacc2x45, _mm256_madd_epi16(vxa2, vxb45));
+      const __m128i vb67 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 48 * sizeof(int8_t)));
+      const __m256i vxb67 = _mm256_cvtepi8_epi16(vb67);
+
+      vacc0x67 = _mm256_add_epi32(vacc0x67, _mm256_madd_epi16(vxa0, vxb67));
+      vacc1x67 = _mm256_add_epi32(vacc1x67, _mm256_madd_epi16(vxa1, vxb67));
+      vacc2x67 = _mm256_add_epi32(vacc2x67, _mm256_madd_epi16(vxa2, vxb67));
+
+      w = (const void*) ((uintptr_t) w + 64 * sizeof(int8_t));
+      k += 8 * sizeof(int8_t);
+    }
+
+    const __m256i vacc0x0213 = _mm256_hadd_epi32(vacc0x01, vacc0x23);
+    const __m256i vacc0x4657 = _mm256_hadd_epi32(vacc0x45, vacc0x67);
+    const __m256i vacc1x0213 = _mm256_hadd_epi32(vacc1x01, vacc1x23);
+    const __m256i vacc1x4657 = _mm256_hadd_epi32(vacc1x45, vacc1x67);
+    const __m256i vacc2x0213 = _mm256_hadd_epi32(vacc2x01, vacc2x23);
+    const __m256i vacc2x4657 = _mm256_hadd_epi32(vacc2x45, vacc2x67);
+
+    const __m256i vacc0x02461357 = _mm256_hadd_epi32(vacc0x0213, vacc0x4657);
+    const __m256i vacc1x02461357 = _mm256_hadd_epi32(vacc1x0213, vacc1x4657);
+    const __m256i vacc2x02461357 = _mm256_hadd_epi32(vacc2x0213, vacc2x4657);
+
+    const __m256i vpermute_mask = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
+    __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask);
+    __m256i vacc1x01234567 = _mm256_permutevar8x32_epi32(vacc1x02461357, vpermute_mask);
+    __m256i vacc2x01234567 = _mm256_permutevar8x32_epi32(vacc2x02461357, vpermute_mask);
+
+    __m256 vscaled0x01234567 = _mm256_cvtepi32_ps(vacc0x01234567);
+    __m256 vscaled1x01234567 = _mm256_cvtepi32_ps(vacc1x01234567);
+    __m256 vscaled2x01234567 = _mm256_cvtepi32_ps(vacc2x01234567);
+
+    const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+    vscaled0x01234567 = _mm256_mul_ps(vscaled0x01234567, vscale);
+    vscaled1x01234567 = _mm256_mul_ps(vscaled1x01234567, vscale);
+    vscaled2x01234567 = _mm256_mul_ps(vscaled2x01234567, vscale);
+
+    vacc0x01234567 = _mm256_cvtps_epi32(vscaled0x01234567);
+    vacc1x01234567 = _mm256_cvtps_epi32(vscaled1x01234567);
+    vacc2x01234567 = _mm256_cvtps_epi32(vscaled2x01234567);
+
+    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->fp32_avx2.output_zero_point);
+    __m256i vacc01x01234567 = _mm256_adds_epi16(_mm256_packs_epi32(vacc0x01234567, vacc1x01234567), voutput_zero_point);
+    __m256i vacc22x01234567 = _mm256_adds_epi16(_mm256_packs_epi32(vacc2x01234567, vacc2x01234567), voutput_zero_point);
+
+    vacc01x01234567 = _mm256_permute4x64_epi64(vacc01x01234567, _MM_SHUFFLE(3, 1, 2, 0));
+    vacc22x01234567 = _mm256_permute4x64_epi64(vacc22x01234567, _MM_SHUFFLE(3, 1, 2, 0));
+
+    __m256i vout = _mm256_packs_epi16(vacc01x01234567, vacc22x01234567);
+
+    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->fp32_avx2.output_min));
+    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->fp32_avx2.output_max));
+
+    __m128i vout_lo = _mm256_castsi256_si128(vout);
+    __m128i vout_hi = _mm256_extracti128_si256(vout, 1);
+
+    if (nc >= 8) {
+      _mm_storel_epi64((__m128i*) c0, vout_lo);
+      _mm_storel_epi64((__m128i*) c1, vout_hi);
+      _mm_storeh_pi((__m64*) c2, _mm_castsi128_ps(vout_lo));
+
+      c0 = (int8_t*) ((uintptr_t) c0 + cn_stride);
+      c1 = (int8_t*) ((uintptr_t) c1 + cn_stride);
+      c2 = (int8_t*) ((uintptr_t) c2 + cn_stride);
+
+      a0 = (const int8_t*) ((uintptr_t) a0 - kc);
+      a1 = (const int8_t*) ((uintptr_t) a1 - kc);
+      a2 = (const int8_t*) ((uintptr_t) a2 - kc);
+
+      nc -= 8;
+    } else {
+      if (nc & 4) {
+        _mm_storeu_si32(c0, vout_lo);
+        _mm_storeu_si32(c1, vout_hi);
+        *((uint32_t*) c2) = (uint32_t) _mm_extract_epi32(vout_lo, 2);
+
+        c0 += 4;
+        c1 += 4;
+        c2 += 4;
+
+        vout_lo = _mm_srli_epi64(vout_lo, 32);
+        vout_hi = _mm_srli_epi64(vout_hi, 32);
+      }
+      if (nc & 2) {
+        *((uint16_t*) c0) = (uint16_t) _mm_extract_epi16(vout_lo, 0);
+        *((uint16_t*) c1) = (uint16_t) _mm_extract_epi16(vout_hi, 0);
+        *((uint16_t*) c2) = (uint16_t) _mm_extract_epi16(vout_lo, 4);
+
+        c0 += 2;
+        c1 += 2;
+        c2 += 2;
+
+        vout_lo = _mm_srli_epi32(vout_lo, 16);
+        vout_hi = _mm_srli_epi32(vout_hi, 16);
+      }
+      if (nc & 1) {
+        *c0 = (int8_t) _mm_extract_epi8(vout_lo, 0);
+        *c1 = (uint8_t) _mm_extract_epi8(vout_hi, 0);
+        *c2 = (uint8_t) _mm_extract_epi8(vout_lo, 8);
+      }
+
+      nc = 0;
+    }
+  } while (nc != 0);
+}
diff --git a/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-avx2.c b/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-avx2.c
index 68c2e34..b87cc62 100644
--- a/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-avx2.c
+++ b/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-avx2.c
@@ -133,8 +133,8 @@
     __m256i vacc1x01234567 = _mm256_permutevar8x32_epi32(vacc1x02461357, vpermute_mask);
     __m256i vacc2x01234567 = _mm256_permutevar8x32_epi32(vacc2x02461357, vpermute_mask);
 
-    const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-    const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+    const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+    const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
     const __m256i vacc0x11335577 = _mm256_srli_epi64(vacc0x01234567, 32);
     const __m256i vacc1x11335577 = _mm256_srli_epi64(vacc1x01234567, 32);
@@ -159,7 +159,7 @@
     const __m256i vq31prod1x01234567 = _mm256_blend_epi16(vq31prod1x0246, vq31prod1x1357, 0xCC);
     const __m256i vq31prod2x01234567 = _mm256_blend_epi16(vq31prod2x0246, vq31prod2x1357, 0xCC);
 
-    const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+    const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
     const __m256i vrem0x01234567 =
       _mm256_add_epi32(_mm256_and_si256(vq31prod0x01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod0x01234567));
     const __m256i vrem1x01234567 =
@@ -167,8 +167,8 @@
     const __m256i vrem2x01234567 =
       _mm256_add_epi32(_mm256_and_si256(vq31prod2x01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod2x01234567));
 
-    const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
+    const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_avx2.shift);
     vacc0x01234567 =
       _mm256_sub_epi32(_mm256_sra_epi32(vq31prod0x01234567, vshift), _mm256_cmpgt_epi32(vrem0x01234567, vremainder_threshold));
     vacc1x01234567 =
@@ -176,7 +176,7 @@
     vacc2x01234567 =
       _mm256_sub_epi32(_mm256_sra_epi32(vq31prod2x01234567, vshift), _mm256_cmpgt_epi32(vrem2x01234567, vremainder_threshold));
 
-    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx2.output_zero_point);
+    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_zero_point);
     __m256i vacc01x01234567 = _mm256_adds_epi16(_mm256_packs_epi32(vacc0x01234567, vacc1x01234567), voutput_zero_point);
     __m256i vacc22x01234567 = _mm256_adds_epi16(_mm256_packs_epi32(vacc2x01234567, vacc2x01234567), voutput_zero_point);
 
@@ -185,8 +185,8 @@
 
     __m256i vout = _mm256_packs_epi16(vacc01x01234567, vacc22x01234567);
 
-    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->avx2.output_min));
-    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->avx2.output_max));
+    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_min));
+    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_max));
 
     __m128i vout_lo = _mm256_castsi256_si128(vout);
     __m128i vout_hi = _mm256_extracti128_si256(vout, 1);
diff --git a/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mlal-padal.c
index 564e036..85349cb 100644
--- a/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -311,7 +311,7 @@
     int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -319,7 +319,7 @@
     vacc2x0123 = vqrdmulhq_s32(vacc2x0123, vmultiplier);
     vacc2x4567 = vqrdmulhq_s32(vacc2x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -335,7 +335,7 @@
     vacc2x0123 = vrshlq_s32(vacc2x0123, vright_shift);
     vacc2x4567 = vrshlq_s32(vacc2x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -350,8 +350,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x8_t vout2x01234567 = vqmovn_s16(vacc2x01234567);
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
     vout2x01234567 = vmax_s8(vout2x01234567, vget_low_s8(voutput_min));
diff --git a/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mull-padal.c
index 38f9282..deb14a7 100644
--- a/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mull-padal.c
@@ -210,7 +210,7 @@
     int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -218,7 +218,7 @@
     vacc2x0123 = vqrdmulhq_s32(vacc2x0123, vmultiplier);
     vacc2x4567 = vqrdmulhq_s32(vacc2x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -234,7 +234,7 @@
     vacc2x0123 = vrshlq_s32(vacc2x0123, vright_shift);
     vacc2x4567 = vrshlq_s32(vacc2x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -249,8 +249,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x8_t vout2x01234567 = vqmovn_s16(vacc2x01234567);
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
     vout2x01234567 = vmax_s8(vout2x01234567, vget_low_s8(voutput_min));
diff --git a/src/qs8-gemm/gen/3x8c8-xw-minmax-gemmlowp-avx2.c b/src/qs8-gemm/gen/3x8c8-xw-minmax-gemmlowp-avx2.c
index 6772f39..0a4326c 100644
--- a/src/qs8-gemm/gen/3x8c8-xw-minmax-gemmlowp-avx2.c
+++ b/src/qs8-gemm/gen/3x8c8-xw-minmax-gemmlowp-avx2.c
@@ -129,8 +129,8 @@
     __m256i vacc1x01234567 = _mm256_permutevar8x32_epi32(vacc1x02461357, vpermute_mask);
     __m256i vacc2x01234567 = _mm256_permutevar8x32_epi32(vacc2x02461357, vpermute_mask);
 
-    const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-    const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+    const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+    const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
     const __m256i vacc0x11335577 = _mm256_srli_epi64(vacc0x01234567, 32);
     const __m256i vacc1x11335577 = _mm256_srli_epi64(vacc1x01234567, 32);
@@ -155,7 +155,7 @@
     const __m256i vq31prod1x01234567 = _mm256_blend_epi16(vq31prod1x0246, vq31prod1x1357, 0xCC);
     const __m256i vq31prod2x01234567 = _mm256_blend_epi16(vq31prod2x0246, vq31prod2x1357, 0xCC);
 
-    const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+    const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
     const __m256i vrem0x01234567 =
       _mm256_add_epi32(_mm256_and_si256(vq31prod0x01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod0x01234567));
     const __m256i vrem1x01234567 =
@@ -163,8 +163,8 @@
     const __m256i vrem2x01234567 =
       _mm256_add_epi32(_mm256_and_si256(vq31prod2x01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod2x01234567));
 
-    const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
+    const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_avx2.shift);
     vacc0x01234567 =
       _mm256_sub_epi32(_mm256_sra_epi32(vq31prod0x01234567, vshift), _mm256_cmpgt_epi32(vrem0x01234567, vremainder_threshold));
     vacc1x01234567 =
@@ -172,7 +172,7 @@
     vacc2x01234567 =
       _mm256_sub_epi32(_mm256_sra_epi32(vq31prod2x01234567, vshift), _mm256_cmpgt_epi32(vrem2x01234567, vremainder_threshold));
 
-    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx2.output_zero_point);
+    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_zero_point);
     __m256i vacc01x01234567 = _mm256_adds_epi16(_mm256_packs_epi32(vacc0x01234567, vacc1x01234567), voutput_zero_point);
     __m256i vacc22x01234567 = _mm256_adds_epi16(_mm256_packs_epi32(vacc2x01234567, vacc2x01234567), voutput_zero_point);
 
@@ -181,8 +181,8 @@
 
     __m256i vout = _mm256_packs_epi16(vacc01x01234567, vacc22x01234567);
 
-    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->avx2.output_min));
-    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->avx2.output_max));
+    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_min));
+    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_max));
 
     __m128i vout_lo = _mm256_castsi256_si128(vout);
     __m128i vout_hi = _mm256_extracti128_si256(vout, 1);
diff --git a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 249c1a7..5ee36f5 100644
--- a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -446,7 +446,7 @@
       }
     }
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -464,7 +464,7 @@
     vacc3x89AB = vqrdmulhq_s32(vacc3x89AB, vmultiplier);
     vacc3xCDEF = vqrdmulhq_s32(vacc3xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -500,7 +500,7 @@
     vacc3x89AB = vrshlq_s32(vacc3x89AB, vright_shift);
     vacc3xCDEF = vrshlq_s32(vacc3xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -530,8 +530,8 @@
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
     int8x16_t vout3x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc3x01234567), vqmovn_s16(vacc3x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane.c
index 90c23bc..0e78e76 100644
--- a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -444,7 +444,7 @@
       }
     }
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -462,7 +462,7 @@
     vacc3x89AB = vqrdmulhq_s32(vacc3x89AB, vmultiplier);
     vacc3xCDEF = vqrdmulhq_s32(vacc3xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -498,7 +498,7 @@
     vacc3x89AB = vrshlq_s32(vacc3x89AB, vright_shift);
     vacc3xCDEF = vrshlq_s32(vacc3xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -528,8 +528,8 @@
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
     int8x16_t vout3x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc3x01234567), vqmovn_s16(vacc3x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mull-addw-dup.c
index 000c5eb..a0bba2b 100644
--- a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -523,7 +523,7 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -541,7 +541,7 @@
     vacc3x89AB = vqrdmulhq_s32(vacc3x89AB, vmultiplier);
     vacc3xCDEF = vqrdmulhq_s32(vacc3xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -577,7 +577,7 @@
     vacc3x89AB = vrshlq_s32(vacc3x89AB, vright_shift);
     vacc3xCDEF = vrshlq_s32(vacc3xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -607,8 +607,8 @@
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
     int8x16_t vout3x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc3x01234567), vqmovn_s16(vacc3x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/4x16c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/4x16c16-minmax-gemmlowp-neon-mlal-padal.c
index 70da22e..1a7fde3 100644
--- a/src/qs8-gemm/gen/4x16c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/4x16c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -509,7 +509,7 @@
     int32x4_t vacc3xCDEF = vcombine_s32(vsum3xCD, vsum3xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -527,7 +527,7 @@
     vacc3x89AB = vqrdmulhq_s32(vacc3x89AB, vmultiplier);
     vacc3xCDEF = vqrdmulhq_s32(vacc3xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -563,7 +563,7 @@
     vacc3x89AB = vrshlq_s32(vacc3x89AB, vright_shift);
     vacc3xCDEF = vrshlq_s32(vacc3xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -592,8 +592,8 @@
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
     int8x16_t vout3x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc3x01234567), vqmovn_s16(vacc3x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 1f1884f..71a2a88 100644
--- a/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -596,7 +596,7 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -614,7 +614,7 @@
     vacc3x89AB = vqrdmulhq_s32(vacc3x89AB, vmultiplier);
     vacc3xCDEF = vqrdmulhq_s32(vacc3xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -650,7 +650,7 @@
     vacc3x89AB = vrshlq_s32(vacc3x89AB, vright_shift);
     vacc3xCDEF = vrshlq_s32(vacc3xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -680,8 +680,8 @@
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
     int8x16_t vout3x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc3x01234567), vqmovn_s16(vacc3x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
index 746bcb3..ed74770 100644
--- a/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -358,7 +358,7 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -376,7 +376,7 @@
     vacc3x89AB = vqrdmulhq_s32(vacc3x89AB, vmultiplier);
     vacc3xCDEF = vqrdmulhq_s32(vacc3xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -412,7 +412,7 @@
     vacc3x89AB = vrshlq_s32(vacc3x89AB, vright_shift);
     vacc3xCDEF = vrshlq_s32(vacc3xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -442,8 +442,8 @@
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
     int8x16_t vout3x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc3x01234567), vqmovn_s16(vacc3x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/4x16c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/4x16c4-minmax-gemmlowp-neondot.c
index c21610b..6b931a0 100644
--- a/src/qs8-gemm/gen/4x16c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/4x16c4-minmax-gemmlowp-neondot.c
@@ -169,25 +169,25 @@
     }
 
     // Post-accumulation work
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
 
-    const int32x4_t vproduct0x0123 = vqrdmulhq_n_s32(vacc0x0123, params->neon.multiplier);
-    const int32x4_t vproduct0x4567 = vqrdmulhq_n_s32(vacc0x4567, params->neon.multiplier);
-    const int32x4_t vproduct0x89AB = vqrdmulhq_n_s32(vacc0x89AB, params->neon.multiplier);
-    const int32x4_t vproduct0xCDEF = vqrdmulhq_n_s32(vacc0xCDEF, params->neon.multiplier);
-    const int32x4_t vproduct1x0123 = vqrdmulhq_n_s32(vacc1x0123, params->neon.multiplier);
-    const int32x4_t vproduct1x4567 = vqrdmulhq_n_s32(vacc1x4567, params->neon.multiplier);
-    const int32x4_t vproduct1x89AB = vqrdmulhq_n_s32(vacc1x89AB, params->neon.multiplier);
-    const int32x4_t vproduct1xCDEF = vqrdmulhq_n_s32(vacc1xCDEF, params->neon.multiplier);
-    const int32x4_t vproduct2x0123 = vqrdmulhq_n_s32(vacc2x0123, params->neon.multiplier);
-    const int32x4_t vproduct2x4567 = vqrdmulhq_n_s32(vacc2x4567, params->neon.multiplier);
-    const int32x4_t vproduct2x89AB = vqrdmulhq_n_s32(vacc2x89AB, params->neon.multiplier);
-    const int32x4_t vproduct2xCDEF = vqrdmulhq_n_s32(vacc2xCDEF, params->neon.multiplier);
-    const int32x4_t vproduct3x0123 = vqrdmulhq_n_s32(vacc3x0123, params->neon.multiplier);
-    const int32x4_t vproduct3x4567 = vqrdmulhq_n_s32(vacc3x4567, params->neon.multiplier);
-    const int32x4_t vproduct3x89AB = vqrdmulhq_n_s32(vacc3x89AB, params->neon.multiplier);
-    const int32x4_t vproduct3xCDEF = vqrdmulhq_n_s32(vacc3xCDEF, params->neon.multiplier);
+    const int32x4_t vproduct0x0123 = vqrdmulhq_n_s32(vacc0x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct0x4567 = vqrdmulhq_n_s32(vacc0x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct0x89AB = vqrdmulhq_n_s32(vacc0x89AB, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct0xCDEF = vqrdmulhq_n_s32(vacc0xCDEF, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct1x0123 = vqrdmulhq_n_s32(vacc1x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct1x4567 = vqrdmulhq_n_s32(vacc1x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct1x89AB = vqrdmulhq_n_s32(vacc1x89AB, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct1xCDEF = vqrdmulhq_n_s32(vacc1xCDEF, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct2x0123 = vqrdmulhq_n_s32(vacc2x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct2x4567 = vqrdmulhq_n_s32(vacc2x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct2x89AB = vqrdmulhq_n_s32(vacc2x89AB, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct2xCDEF = vqrdmulhq_n_s32(vacc2xCDEF, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct3x0123 = vqrdmulhq_n_s32(vacc3x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct3x4567 = vqrdmulhq_n_s32(vacc3x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct3x89AB = vqrdmulhq_n_s32(vacc3x89AB, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct3xCDEF = vqrdmulhq_n_s32(vacc3xCDEF, params->gemmlowp_neon.multiplier);
 
     vacc0x0123 = vsraq_n_s32(vproduct0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vproduct0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -223,7 +223,7 @@
     vacc3x89AB = vrshlq_s32(vacc3x89AB, vright_shift);
     vacc3xCDEF = vrshlq_s32(vacc3xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -253,8 +253,8 @@
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
     int8x16_t vout3x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc3x01234567), vqmovn_s16(vacc3x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-avx512skx.c b/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-avx512skx.c
index 5439a53..8ff8bbe 100644
--- a/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-avx512skx.c
+++ b/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-avx512skx.c
@@ -61,14 +61,14 @@
 
   const __mmask16 vbias_mask = _cvtu32_mask16(0x1111);
   const __mmask16 vblend_mask = _cvtu32_mask16(0xAAAA);
-  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.multiplier));
-  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.rounding));
-  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.remainder_mask));
-  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.remainder_threshold));
-  const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
-  const __m512i voutput_zero_point = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.output_zero_point));
-  const __m512i voutput_min = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.output_min));
-  const __m512i voutput_max = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.output_max));
+  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier));
+  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding));
+  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask));
+  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold));
+  const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
+  const __m512i voutput_zero_point = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point));
+  const __m512i voutput_min = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+  const __m512i voutput_max = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
   do {
     __m512i vacc0x0123 = _mm512_maskz_expandloadu_epi32(vbias_mask, w);
     __m512i vacc0x4567 = _mm512_maskz_expandloadu_epi32(vbias_mask, (const void*) ((uintptr_t) w + 4 * sizeof(int32_t)));
diff --git a/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mlal-padal.c
index f03e0e8..7ce34e0 100644
--- a/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -684,7 +684,7 @@
     int32x4_t vacc3xCDEF = vcombine_s32(vsum3xCD, vsum3xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -702,7 +702,7 @@
     vacc3x89AB = vqrdmulhq_s32(vacc3x89AB, vmultiplier);
     vacc3xCDEF = vqrdmulhq_s32(vacc3xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -738,7 +738,7 @@
     vacc3x89AB = vrshlq_s32(vacc3x89AB, vright_shift);
     vacc3xCDEF = vrshlq_s32(vacc3xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -767,8 +767,8 @@
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
     int8x16_t vout3x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc3x01234567), vqmovn_s16(vacc3x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mull-padal.c
index c5dea1e..2fb0196 100644
--- a/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mull-padal.c
@@ -445,7 +445,7 @@
     int32x4_t vacc3xCDEF = vcombine_s32(vsum3xCD, vsum3xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -463,7 +463,7 @@
     vacc3x89AB = vqrdmulhq_s32(vacc3x89AB, vmultiplier);
     vacc3xCDEF = vqrdmulhq_s32(vacc3xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -499,7 +499,7 @@
     vacc3x89AB = vrshlq_s32(vacc3x89AB, vright_shift);
     vacc3xCDEF = vrshlq_s32(vacc3xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -528,8 +528,8 @@
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
     int8x16_t vout3x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc3x01234567), vqmovn_s16(vacc3x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/4x2-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/4x2-minmax-gemmlowp-scalar.c
index 9679d29..608db29 100644
--- a/src/qs8-gemm/gen/4x2-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/4x2-minmax-gemmlowp-scalar.c
@@ -85,7 +85,7 @@
       k -= sizeof(int8_t);
     } while (k != 0);
 
-    const int32_t vmultiplier = params->scalar.multiplier;
+    const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
     const int64_t vproduct0x0 = (int64_t) vacc0x0 * (int64_t) vmultiplier;
     const int64_t vproduct0x1 = (int64_t) vacc0x1 * (int64_t) vmultiplier;
     const int64_t vproduct1x0 = (int64_t) vacc1x0 * (int64_t) vmultiplier;
@@ -105,7 +105,7 @@
     const int32_t vq31product3x0 = (int32_t) (uint32_t) ((uint64_t) (vproduct3x0 + vq31rounding) >> 31);
     const int32_t vq31product3x1 = (int32_t) (uint32_t) ((uint64_t) (vproduct3x1 + vq31rounding) >> 31);
 
-    const int32_t vremainder_mask = params->scalar.remainder_mask;
+    const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
     const int32_t vremainder0x0 = (vq31product0x0 & vremainder_mask) - (int32_t) (vq31product0x0 < 0);
     const int32_t vremainder0x1 = (vq31product0x1 & vremainder_mask) - (int32_t) (vq31product0x1 < 0);
     const int32_t vremainder1x0 = (vq31product1x0 & vremainder_mask) - (int32_t) (vq31product1x0 < 0);
@@ -115,8 +115,8 @@
     const int32_t vremainder3x0 = (vq31product3x0 & vremainder_mask) - (int32_t) (vq31product3x0 < 0);
     const int32_t vremainder3x1 = (vq31product3x1 & vremainder_mask) - (int32_t) (vq31product3x1 < 0);
 
-    const uint32_t vshift = params->scalar.shift;
-    const int32_t vremainder_threshold = params->scalar.remainder_threshold;
+    const uint32_t vshift = params->gemmlowp_scalar.shift;
+    const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
     int32_t vout0x0 = asr_s32(vq31product0x0, vshift) + (int32_t) (vremainder0x0 > vremainder_threshold);
     int32_t vout0x1 = asr_s32(vq31product0x1, vshift) + (int32_t) (vremainder0x1 > vremainder_threshold);
     int32_t vout1x0 = asr_s32(vq31product1x0, vshift) + (int32_t) (vremainder1x0 > vremainder_threshold);
@@ -126,7 +126,7 @@
     int32_t vout3x0 = asr_s32(vq31product3x0, vshift) + (int32_t) (vremainder3x0 > vremainder_threshold);
     int32_t vout3x1 = asr_s32(vq31product3x1, vshift) + (int32_t) (vremainder3x1 > vremainder_threshold);
 
-    const int32_t vout_min = params->scalar.output_min_less_zero_point;
+    const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
     vout0x0 = math_max_s32(vout0x0, vout_min);
     vout0x1 = math_max_s32(vout0x1, vout_min);
     vout1x0 = math_max_s32(vout1x0, vout_min);
@@ -136,7 +136,7 @@
     vout3x0 = math_max_s32(vout3x0, vout_min);
     vout3x1 = math_max_s32(vout3x1, vout_min);
 
-    const int32_t vout_max = params->scalar.output_max_less_zero_point;
+    const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
     vout0x0 = math_min_s32(vout0x0, vout_max);
     vout0x1 = math_min_s32(vout0x1, vout_max);
     vout1x0 = math_min_s32(vout1x0, vout_max);
@@ -146,7 +146,7 @@
     vout3x0 = math_min_s32(vout3x0, vout_max);
     vout3x1 = math_min_s32(vout3x1, vout_max);
 
-    const int32_t voutput_zero_point = params->scalar.output_zero_point;
+    const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
     vout0x0 += voutput_zero_point;
     vout0x1 += voutput_zero_point;
     vout1x0 += voutput_zero_point;
diff --git a/src/qs8-gemm/gen/4x4-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/4x4-minmax-gemmlowp-scalar.c
index 16419b8..74c469f 100644
--- a/src/qs8-gemm/gen/4x4-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/4x4-minmax-gemmlowp-scalar.c
@@ -103,7 +103,7 @@
       k -= sizeof(int8_t);
     } while (k != 0);
 
-    const int32_t vmultiplier = params->scalar.multiplier;
+    const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
     const int64_t vproduct0x0 = (int64_t) vacc0x0 * (int64_t) vmultiplier;
     const int64_t vproduct0x1 = (int64_t) vacc0x1 * (int64_t) vmultiplier;
     const int64_t vproduct0x2 = (int64_t) vacc0x2 * (int64_t) vmultiplier;
@@ -139,7 +139,7 @@
     const int32_t vq31product3x2 = (int32_t) (uint32_t) ((uint64_t) (vproduct3x2 + vq31rounding) >> 31);
     const int32_t vq31product3x3 = (int32_t) (uint32_t) ((uint64_t) (vproduct3x3 + vq31rounding) >> 31);
 
-    const int32_t vremainder_mask = params->scalar.remainder_mask;
+    const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
     const int32_t vremainder0x0 = (vq31product0x0 & vremainder_mask) - (int32_t) (vq31product0x0 < 0);
     const int32_t vremainder0x1 = (vq31product0x1 & vremainder_mask) - (int32_t) (vq31product0x1 < 0);
     const int32_t vremainder0x2 = (vq31product0x2 & vremainder_mask) - (int32_t) (vq31product0x2 < 0);
@@ -157,8 +157,8 @@
     const int32_t vremainder3x2 = (vq31product3x2 & vremainder_mask) - (int32_t) (vq31product3x2 < 0);
     const int32_t vremainder3x3 = (vq31product3x3 & vremainder_mask) - (int32_t) (vq31product3x3 < 0);
 
-    const uint32_t vshift = params->scalar.shift;
-    const int32_t vremainder_threshold = params->scalar.remainder_threshold;
+    const uint32_t vshift = params->gemmlowp_scalar.shift;
+    const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
     int32_t vout0x0 = asr_s32(vq31product0x0, vshift) + (int32_t) (vremainder0x0 > vremainder_threshold);
     int32_t vout0x1 = asr_s32(vq31product0x1, vshift) + (int32_t) (vremainder0x1 > vremainder_threshold);
     int32_t vout0x2 = asr_s32(vq31product0x2, vshift) + (int32_t) (vremainder0x2 > vremainder_threshold);
@@ -176,7 +176,7 @@
     int32_t vout3x2 = asr_s32(vq31product3x2, vshift) + (int32_t) (vremainder3x2 > vremainder_threshold);
     int32_t vout3x3 = asr_s32(vq31product3x3, vshift) + (int32_t) (vremainder3x3 > vremainder_threshold);
 
-    const int32_t vout_min = params->scalar.output_min_less_zero_point;
+    const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
     vout0x0 = math_max_s32(vout0x0, vout_min);
     vout0x1 = math_max_s32(vout0x1, vout_min);
     vout0x2 = math_max_s32(vout0x2, vout_min);
@@ -194,7 +194,7 @@
     vout3x2 = math_max_s32(vout3x2, vout_min);
     vout3x3 = math_max_s32(vout3x3, vout_min);
 
-    const int32_t vout_max = params->scalar.output_max_less_zero_point;
+    const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
     vout0x0 = math_min_s32(vout0x0, vout_max);
     vout0x1 = math_min_s32(vout0x1, vout_max);
     vout0x2 = math_min_s32(vout0x2, vout_max);
@@ -212,7 +212,7 @@
     vout3x2 = math_min_s32(vout3x2, vout_max);
     vout3x3 = math_min_s32(vout3x3, vout_max);
 
-    const int32_t voutput_zero_point = params->scalar.output_zero_point;
+    const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
     vout0x0 += voutput_zero_point;
     vout0x1 += voutput_zero_point;
     vout0x2 += voutput_zero_point;
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld128.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld128.c
index b0cec37..6ee3c62 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld128.c
@@ -186,8 +186,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -218,7 +218,7 @@
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
     const __m128i vq31prod3x0123 = _mm_blend_epi16(vq31prod3x02, vq31prod3x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -228,8 +228,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -239,15 +239,15 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc23x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld64.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld64.c
index a3db995..939d732 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld64.c
@@ -186,8 +186,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -218,7 +218,7 @@
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
     const __m128i vq31prod3x0123 = _mm_blend_epi16(vq31prod3x02, vq31prod3x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -228,8 +228,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -239,15 +239,15 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc23x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld128.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld128.c
index 17f7643..df5ede3 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld128.c
@@ -186,8 +186,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -258,7 +258,7 @@
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod3x0123 = _mm_shuffle_epi32(vq31prod3x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -268,8 +268,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -279,12 +279,12 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc23x0123 = _mm_min_epi16(_mm_max_epi16(vacc23x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld64.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld64.c
index d2f821c..9882976 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld64.c
@@ -186,8 +186,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -258,7 +258,7 @@
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod3x0123 = _mm_shuffle_epi32(vq31prod3x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -268,8 +268,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -279,12 +279,12 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc23x0123 = _mm_min_epi16(_mm_max_epi16(vacc23x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld128.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld128.c
index 2d53868..a7ea9d9 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld128.c
@@ -186,8 +186,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -218,7 +218,7 @@
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
     const __m128i vq31prod3x0123 = _mm_blend_epi16(vq31prod3x02, vq31prod3x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -228,8 +228,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -239,15 +239,15 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc23x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld64.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld64.c
index dafa8dc..64fb13f 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld64.c
@@ -186,8 +186,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -218,7 +218,7 @@
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
     const __m128i vq31prod3x0123 = _mm_blend_epi16(vq31prod3x02, vq31prod3x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -228,8 +228,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -239,15 +239,15 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc23x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld128.c
index 9c8cf70..739e36b 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld128.c
@@ -186,8 +186,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -258,7 +258,7 @@
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod3x0123 = _mm_shuffle_epi32(vq31prod3x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -268,8 +268,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -279,12 +279,12 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc23x0123 = _mm_min_epi16(_mm_max_epi16(vacc23x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld64.c
index 1225935..fb97abd 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld64.c
@@ -186,8 +186,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -258,7 +258,7 @@
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod3x0123 = _mm_shuffle_epi32(vq31prod3x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -268,8 +268,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -279,12 +279,12 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc23x0123 = _mm_min_epi16(_mm_max_epi16(vacc23x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld128.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld128.c
index 10bf278..4a2cf7b 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld128.c
@@ -191,8 +191,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -223,7 +223,7 @@
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
     const __m128i vq31prod3x0123 = _mm_blend_epi16(vq31prod3x02, vq31prod3x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -233,8 +233,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -244,15 +244,15 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc23x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld64.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld64.c
index 4e2b48a..0af6be9 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld64.c
@@ -191,8 +191,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -223,7 +223,7 @@
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
     const __m128i vq31prod3x0123 = _mm_blend_epi16(vq31prod3x02, vq31prod3x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -233,8 +233,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -244,15 +244,15 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc23x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-avx.c b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-avx.c
index 06a057a..de0f304 100644
--- a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-avx.c
+++ b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-avx.c
@@ -179,8 +179,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -211,7 +211,7 @@
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
     const __m128i vq31prod3x0123 = _mm_blend_epi16(vq31prod3x02, vq31prod3x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -221,8 +221,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -232,15 +232,15 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc23x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse2.c b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse2.c
index 127c98b..ed56800 100644
--- a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse2.c
+++ b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse2.c
@@ -179,8 +179,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -251,7 +251,7 @@
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod3x0123 = _mm_shuffle_epi32(vq31prod3x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -261,8 +261,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -272,12 +272,12 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc23x0123 = _mm_min_epi16(_mm_max_epi16(vacc23x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse41.c b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse41.c
index 93f66b1..29df1a0 100644
--- a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse41.c
+++ b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse41.c
@@ -179,8 +179,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -211,7 +211,7 @@
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
     const __m128i vq31prod3x0123 = _mm_blend_epi16(vq31prod3x02, vq31prod3x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -221,8 +221,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -232,15 +232,15 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc23x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-ssse3.c b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-ssse3.c
index 357bec9..c52fd01 100644
--- a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-ssse3.c
+++ b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-ssse3.c
@@ -179,8 +179,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -251,7 +251,7 @@
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod3x0123 = _mm_shuffle_epi32(vq31prod3x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -261,8 +261,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -272,12 +272,12 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc23x0123 = _mm_min_epi16(_mm_max_epi16(vacc23x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-xop.c b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-xop.c
index 61952bd..b4a5242 100644
--- a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-xop.c
+++ b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-xop.c
@@ -184,8 +184,8 @@
       }
     }
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -216,7 +216,7 @@
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
     const __m128i vq31prod3x0123 = _mm_blend_epi16(vq31prod3x02, vq31prod3x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -226,8 +226,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -237,15 +237,15 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc23x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 36b85b2..9b6ed23 100644
--- a/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -279,7 +279,7 @@
       }
     }
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -289,7 +289,7 @@
     vacc3x0123 = vqrdmulhq_s32(vacc3x0123, vmultiplier);
     vacc3x4567 = vqrdmulhq_s32(vacc3x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -309,7 +309,7 @@
     vacc3x0123 = vrshlq_s32(vacc3x0123, vright_shift);
     vacc3x4567 = vrshlq_s32(vacc3x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -327,8 +327,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
diff --git a/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c
index fa00c3a..de22292 100644
--- a/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -278,7 +278,7 @@
       }
     }
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -288,7 +288,7 @@
     vacc3x0123 = vqrdmulhq_s32(vacc3x0123, vmultiplier);
     vacc3x4567 = vqrdmulhq_s32(vacc3x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -308,7 +308,7 @@
     vacc3x0123 = vrshlq_s32(vacc3x0123, vright_shift);
     vacc3x4567 = vrshlq_s32(vacc3x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -326,8 +326,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
diff --git a/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mull-addw-dup.c
index c53e78c..2c78c76 100644
--- a/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -312,7 +312,7 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -322,7 +322,7 @@
     vacc3x0123 = vqrdmulhq_s32(vacc3x0123, vmultiplier);
     vacc3x4567 = vqrdmulhq_s32(vacc3x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -342,7 +342,7 @@
     vacc3x0123 = vrshlq_s32(vacc3x0123, vright_shift);
     vacc3x4567 = vrshlq_s32(vacc3x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -360,8 +360,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
diff --git a/src/qs8-gemm/gen/4x8c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/4x8c16-minmax-gemmlowp-neon-mlal-padal.c
index 4351ac3..1ee332c 100644
--- a/src/qs8-gemm/gen/4x8c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/4x8c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -293,7 +293,7 @@
     int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -303,7 +303,7 @@
     vacc3x0123 = vqrdmulhq_s32(vacc3x0123, vmultiplier);
     vacc3x4567 = vqrdmulhq_s32(vacc3x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -323,7 +323,7 @@
     vacc3x0123 = vrshlq_s32(vacc3x0123, vright_shift);
     vacc3x4567 = vrshlq_s32(vacc3x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -340,8 +340,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
diff --git a/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index acf3d2e..42b9632 100644
--- a/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -350,7 +350,7 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -360,7 +360,7 @@
     vacc3x0123 = vqrdmulhq_s32(vacc3x0123, vmultiplier);
     vacc3x4567 = vqrdmulhq_s32(vacc3x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -380,7 +380,7 @@
     vacc3x0123 = vrshlq_s32(vacc3x0123, vright_shift);
     vacc3x4567 = vrshlq_s32(vacc3x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -398,8 +398,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
diff --git a/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
index d640aef..c7acc44 100644
--- a/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -224,7 +224,7 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -234,7 +234,7 @@
     vacc3x0123 = vqrdmulhq_s32(vacc3x0123, vmultiplier);
     vacc3x4567 = vqrdmulhq_s32(vacc3x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -254,7 +254,7 @@
     vacc3x0123 = vrshlq_s32(vacc3x0123, vright_shift);
     vacc3x4567 = vrshlq_s32(vacc3x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -272,8 +272,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
diff --git a/src/qs8-gemm/gen/4x8c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/4x8c4-minmax-gemmlowp-neondot.c
index 949f81b..266a756 100644
--- a/src/qs8-gemm/gen/4x8c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/4x8c4-minmax-gemmlowp-neondot.c
@@ -131,17 +131,17 @@
     }
 
     // Post-accumulation work
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
 
-    const int32x4_t vproduct0x0123 = vqrdmulhq_n_s32(vacc0x0123, params->neon.multiplier);
-    const int32x4_t vproduct0x4567 = vqrdmulhq_n_s32(vacc0x4567, params->neon.multiplier);
-    const int32x4_t vproduct1x0123 = vqrdmulhq_n_s32(vacc1x0123, params->neon.multiplier);
-    const int32x4_t vproduct1x4567 = vqrdmulhq_n_s32(vacc1x4567, params->neon.multiplier);
-    const int32x4_t vproduct2x0123 = vqrdmulhq_n_s32(vacc2x0123, params->neon.multiplier);
-    const int32x4_t vproduct2x4567 = vqrdmulhq_n_s32(vacc2x4567, params->neon.multiplier);
-    const int32x4_t vproduct3x0123 = vqrdmulhq_n_s32(vacc3x0123, params->neon.multiplier);
-    const int32x4_t vproduct3x4567 = vqrdmulhq_n_s32(vacc3x4567, params->neon.multiplier);
+    const int32x4_t vproduct0x0123 = vqrdmulhq_n_s32(vacc0x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct0x4567 = vqrdmulhq_n_s32(vacc0x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct1x0123 = vqrdmulhq_n_s32(vacc1x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct1x4567 = vqrdmulhq_n_s32(vacc1x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct2x0123 = vqrdmulhq_n_s32(vacc2x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct2x4567 = vqrdmulhq_n_s32(vacc2x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct3x0123 = vqrdmulhq_n_s32(vacc3x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct3x4567 = vqrdmulhq_n_s32(vacc3x4567, params->gemmlowp_neon.multiplier);
 
     vacc0x0123 = vsraq_n_s32(vproduct0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vproduct0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -161,7 +161,7 @@
     vacc3x0123 = vrshlq_s32(vacc3x0123, vright_shift);
     vacc3x4567 = vrshlq_s32(vacc3x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -179,8 +179,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
diff --git a/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mlal-padal.c
index 5d4c337..b4ad38c 100644
--- a/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -388,7 +388,7 @@
     int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -398,7 +398,7 @@
     vacc3x0123 = vqrdmulhq_s32(vacc3x0123, vmultiplier);
     vacc3x4567 = vqrdmulhq_s32(vacc3x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -418,7 +418,7 @@
     vacc3x0123 = vrshlq_s32(vacc3x0123, vright_shift);
     vacc3x4567 = vrshlq_s32(vacc3x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -435,8 +435,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
diff --git a/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mull-padal.c
index 81a0a81..7042d13 100644
--- a/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mull-padal.c
@@ -261,7 +261,7 @@
     int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -271,7 +271,7 @@
     vacc3x0123 = vqrdmulhq_s32(vacc3x0123, vmultiplier);
     vacc3x4567 = vqrdmulhq_s32(vacc3x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -291,7 +291,7 @@
     vacc3x0123 = vrshlq_s32(vacc3x0123, vright_shift);
     vacc3x4567 = vrshlq_s32(vacc3x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -308,8 +308,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
diff --git a/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 3a9b99b..5fd4633 100644
--- a/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -594,7 +594,7 @@
       }
     }
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -620,7 +620,7 @@
     vacc5x89AB = vqrdmulhq_s32(vacc5x89AB, vmultiplier);
     vacc5xCDEF = vqrdmulhq_s32(vacc5xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -672,7 +672,7 @@
     vacc5x89AB = vrshlq_s32(vacc5x89AB, vright_shift);
     vacc5xCDEF = vrshlq_s32(vacc5xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -714,8 +714,8 @@
     int8x16_t vout4x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc4x89ABCDEF));
     int8x16_t vout5x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc5x01234567), vqmovn_s16(vacc5x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c
index b7bb61b..2812abd 100644
--- a/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -592,7 +592,7 @@
       }
     }
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -618,7 +618,7 @@
     vacc5x89AB = vqrdmulhq_s32(vacc5x89AB, vmultiplier);
     vacc5xCDEF = vqrdmulhq_s32(vacc5xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -670,7 +670,7 @@
     vacc5x89AB = vrshlq_s32(vacc5x89AB, vright_shift);
     vacc5xCDEF = vrshlq_s32(vacc5xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -712,8 +712,8 @@
     int8x16_t vout4x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc4x89ABCDEF));
     int8x16_t vout5x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc5x01234567), vqmovn_s16(vacc5x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/6x16c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/6x16c4-minmax-gemmlowp-neondot.c
index 8eb91a7..abf5913 100644
--- a/src/qs8-gemm/gen/6x16c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/6x16c4-minmax-gemmlowp-neondot.c
@@ -217,33 +217,33 @@
     }
 
     // Post-accumulation work
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
 
-    const int32x4_t vproduct0x0123 = vqrdmulhq_n_s32(vacc0x0123, params->neon.multiplier);
-    const int32x4_t vproduct0x4567 = vqrdmulhq_n_s32(vacc0x4567, params->neon.multiplier);
-    const int32x4_t vproduct0x89AB = vqrdmulhq_n_s32(vacc0x89AB, params->neon.multiplier);
-    const int32x4_t vproduct0xCDEF = vqrdmulhq_n_s32(vacc0xCDEF, params->neon.multiplier);
-    const int32x4_t vproduct1x0123 = vqrdmulhq_n_s32(vacc1x0123, params->neon.multiplier);
-    const int32x4_t vproduct1x4567 = vqrdmulhq_n_s32(vacc1x4567, params->neon.multiplier);
-    const int32x4_t vproduct1x89AB = vqrdmulhq_n_s32(vacc1x89AB, params->neon.multiplier);
-    const int32x4_t vproduct1xCDEF = vqrdmulhq_n_s32(vacc1xCDEF, params->neon.multiplier);
-    const int32x4_t vproduct2x0123 = vqrdmulhq_n_s32(vacc2x0123, params->neon.multiplier);
-    const int32x4_t vproduct2x4567 = vqrdmulhq_n_s32(vacc2x4567, params->neon.multiplier);
-    const int32x4_t vproduct2x89AB = vqrdmulhq_n_s32(vacc2x89AB, params->neon.multiplier);
-    const int32x4_t vproduct2xCDEF = vqrdmulhq_n_s32(vacc2xCDEF, params->neon.multiplier);
-    const int32x4_t vproduct3x0123 = vqrdmulhq_n_s32(vacc3x0123, params->neon.multiplier);
-    const int32x4_t vproduct3x4567 = vqrdmulhq_n_s32(vacc3x4567, params->neon.multiplier);
-    const int32x4_t vproduct3x89AB = vqrdmulhq_n_s32(vacc3x89AB, params->neon.multiplier);
-    const int32x4_t vproduct3xCDEF = vqrdmulhq_n_s32(vacc3xCDEF, params->neon.multiplier);
-    const int32x4_t vproduct4x0123 = vqrdmulhq_n_s32(vacc4x0123, params->neon.multiplier);
-    const int32x4_t vproduct4x4567 = vqrdmulhq_n_s32(vacc4x4567, params->neon.multiplier);
-    const int32x4_t vproduct4x89AB = vqrdmulhq_n_s32(vacc4x89AB, params->neon.multiplier);
-    const int32x4_t vproduct4xCDEF = vqrdmulhq_n_s32(vacc4xCDEF, params->neon.multiplier);
-    const int32x4_t vproduct5x0123 = vqrdmulhq_n_s32(vacc5x0123, params->neon.multiplier);
-    const int32x4_t vproduct5x4567 = vqrdmulhq_n_s32(vacc5x4567, params->neon.multiplier);
-    const int32x4_t vproduct5x89AB = vqrdmulhq_n_s32(vacc5x89AB, params->neon.multiplier);
-    const int32x4_t vproduct5xCDEF = vqrdmulhq_n_s32(vacc5xCDEF, params->neon.multiplier);
+    const int32x4_t vproduct0x0123 = vqrdmulhq_n_s32(vacc0x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct0x4567 = vqrdmulhq_n_s32(vacc0x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct0x89AB = vqrdmulhq_n_s32(vacc0x89AB, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct0xCDEF = vqrdmulhq_n_s32(vacc0xCDEF, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct1x0123 = vqrdmulhq_n_s32(vacc1x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct1x4567 = vqrdmulhq_n_s32(vacc1x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct1x89AB = vqrdmulhq_n_s32(vacc1x89AB, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct1xCDEF = vqrdmulhq_n_s32(vacc1xCDEF, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct2x0123 = vqrdmulhq_n_s32(vacc2x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct2x4567 = vqrdmulhq_n_s32(vacc2x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct2x89AB = vqrdmulhq_n_s32(vacc2x89AB, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct2xCDEF = vqrdmulhq_n_s32(vacc2xCDEF, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct3x0123 = vqrdmulhq_n_s32(vacc3x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct3x4567 = vqrdmulhq_n_s32(vacc3x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct3x89AB = vqrdmulhq_n_s32(vacc3x89AB, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct3xCDEF = vqrdmulhq_n_s32(vacc3xCDEF, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct4x0123 = vqrdmulhq_n_s32(vacc4x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct4x4567 = vqrdmulhq_n_s32(vacc4x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct4x89AB = vqrdmulhq_n_s32(vacc4x89AB, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct4xCDEF = vqrdmulhq_n_s32(vacc4xCDEF, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct5x0123 = vqrdmulhq_n_s32(vacc5x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct5x4567 = vqrdmulhq_n_s32(vacc5x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct5x89AB = vqrdmulhq_n_s32(vacc5x89AB, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct5xCDEF = vqrdmulhq_n_s32(vacc5xCDEF, params->gemmlowp_neon.multiplier);
 
     vacc0x0123 = vsraq_n_s32(vproduct0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vproduct0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -295,7 +295,7 @@
     vacc5x89AB = vrshlq_s32(vacc5x89AB, vright_shift);
     vacc5xCDEF = vrshlq_s32(vacc5xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -337,8 +337,8 @@
     int8x16_t vout4x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc4x89ABCDEF));
     int8x16_t vout5x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc5x01234567), vqmovn_s16(vacc5x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 2bb0e53..b08dfd9 100644
--- a/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -363,7 +363,7 @@
       }
     }
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -377,7 +377,7 @@
     vacc5x0123 = vqrdmulhq_s32(vacc5x0123, vmultiplier);
     vacc5x4567 = vqrdmulhq_s32(vacc5x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -405,7 +405,7 @@
     vacc5x0123 = vrshlq_s32(vacc5x0123, vright_shift);
     vacc5x4567 = vrshlq_s32(vacc5x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -429,8 +429,8 @@
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
     int8x16_t vout4x01234567_5x01234567 = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc5x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
diff --git a/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c
index d36d452..2336529 100644
--- a/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -362,7 +362,7 @@
       }
     }
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -376,7 +376,7 @@
     vacc5x0123 = vqrdmulhq_s32(vacc5x0123, vmultiplier);
     vacc5x4567 = vqrdmulhq_s32(vacc5x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -404,7 +404,7 @@
     vacc5x0123 = vrshlq_s32(vacc5x0123, vright_shift);
     vacc5x4567 = vrshlq_s32(vacc5x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -428,8 +428,8 @@
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
     int8x16_t vout4x01234567_5x01234567 = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc5x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
diff --git a/src/qs8-gemm/gen/6x8c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/6x8c4-minmax-gemmlowp-neondot.c
index 230bbff..6a54838 100644
--- a/src/qs8-gemm/gen/6x8c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/6x8c4-minmax-gemmlowp-neondot.c
@@ -163,21 +163,21 @@
     }
 
     // Post-accumulation work
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
 
-    const int32x4_t vproduct0x0123 = vqrdmulhq_n_s32(vacc0x0123, params->neon.multiplier);
-    const int32x4_t vproduct0x4567 = vqrdmulhq_n_s32(vacc0x4567, params->neon.multiplier);
-    const int32x4_t vproduct1x0123 = vqrdmulhq_n_s32(vacc1x0123, params->neon.multiplier);
-    const int32x4_t vproduct1x4567 = vqrdmulhq_n_s32(vacc1x4567, params->neon.multiplier);
-    const int32x4_t vproduct2x0123 = vqrdmulhq_n_s32(vacc2x0123, params->neon.multiplier);
-    const int32x4_t vproduct2x4567 = vqrdmulhq_n_s32(vacc2x4567, params->neon.multiplier);
-    const int32x4_t vproduct3x0123 = vqrdmulhq_n_s32(vacc3x0123, params->neon.multiplier);
-    const int32x4_t vproduct3x4567 = vqrdmulhq_n_s32(vacc3x4567, params->neon.multiplier);
-    const int32x4_t vproduct4x0123 = vqrdmulhq_n_s32(vacc4x0123, params->neon.multiplier);
-    const int32x4_t vproduct4x4567 = vqrdmulhq_n_s32(vacc4x4567, params->neon.multiplier);
-    const int32x4_t vproduct5x0123 = vqrdmulhq_n_s32(vacc5x0123, params->neon.multiplier);
-    const int32x4_t vproduct5x4567 = vqrdmulhq_n_s32(vacc5x4567, params->neon.multiplier);
+    const int32x4_t vproduct0x0123 = vqrdmulhq_n_s32(vacc0x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct0x4567 = vqrdmulhq_n_s32(vacc0x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct1x0123 = vqrdmulhq_n_s32(vacc1x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct1x4567 = vqrdmulhq_n_s32(vacc1x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct2x0123 = vqrdmulhq_n_s32(vacc2x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct2x4567 = vqrdmulhq_n_s32(vacc2x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct3x0123 = vqrdmulhq_n_s32(vacc3x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct3x4567 = vqrdmulhq_n_s32(vacc3x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct4x0123 = vqrdmulhq_n_s32(vacc4x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct4x4567 = vqrdmulhq_n_s32(vacc4x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct5x0123 = vqrdmulhq_n_s32(vacc5x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct5x4567 = vqrdmulhq_n_s32(vacc5x4567, params->gemmlowp_neon.multiplier);
 
     vacc0x0123 = vsraq_n_s32(vproduct0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vproduct0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -205,7 +205,7 @@
     vacc5x0123 = vrshlq_s32(vacc5x0123, vright_shift);
     vacc5x4567 = vrshlq_s32(vacc5x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -229,8 +229,8 @@
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
     int8x16_t vout4x01234567_5x01234567 = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc5x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
diff --git a/src/qs8-gemm/gen/8x16c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/8x16c4-minmax-gemmlowp-neondot.c
index da350ec..78f241d 100644
--- a/src/qs8-gemm/gen/8x16c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/8x16c4-minmax-gemmlowp-neondot.c
@@ -265,41 +265,41 @@
     }
 
     // Post-accumulation work
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
 
-    const int32x4_t vproduct0x0123 = vqrdmulhq_n_s32(vacc0x0123, params->neon.multiplier);
-    const int32x4_t vproduct0x4567 = vqrdmulhq_n_s32(vacc0x4567, params->neon.multiplier);
-    const int32x4_t vproduct0x89AB = vqrdmulhq_n_s32(vacc0x89AB, params->neon.multiplier);
-    const int32x4_t vproduct0xCDEF = vqrdmulhq_n_s32(vacc0xCDEF, params->neon.multiplier);
-    const int32x4_t vproduct1x0123 = vqrdmulhq_n_s32(vacc1x0123, params->neon.multiplier);
-    const int32x4_t vproduct1x4567 = vqrdmulhq_n_s32(vacc1x4567, params->neon.multiplier);
-    const int32x4_t vproduct1x89AB = vqrdmulhq_n_s32(vacc1x89AB, params->neon.multiplier);
-    const int32x4_t vproduct1xCDEF = vqrdmulhq_n_s32(vacc1xCDEF, params->neon.multiplier);
-    const int32x4_t vproduct2x0123 = vqrdmulhq_n_s32(vacc2x0123, params->neon.multiplier);
-    const int32x4_t vproduct2x4567 = vqrdmulhq_n_s32(vacc2x4567, params->neon.multiplier);
-    const int32x4_t vproduct2x89AB = vqrdmulhq_n_s32(vacc2x89AB, params->neon.multiplier);
-    const int32x4_t vproduct2xCDEF = vqrdmulhq_n_s32(vacc2xCDEF, params->neon.multiplier);
-    const int32x4_t vproduct3x0123 = vqrdmulhq_n_s32(vacc3x0123, params->neon.multiplier);
-    const int32x4_t vproduct3x4567 = vqrdmulhq_n_s32(vacc3x4567, params->neon.multiplier);
-    const int32x4_t vproduct3x89AB = vqrdmulhq_n_s32(vacc3x89AB, params->neon.multiplier);
-    const int32x4_t vproduct3xCDEF = vqrdmulhq_n_s32(vacc3xCDEF, params->neon.multiplier);
-    const int32x4_t vproduct4x0123 = vqrdmulhq_n_s32(vacc4x0123, params->neon.multiplier);
-    const int32x4_t vproduct4x4567 = vqrdmulhq_n_s32(vacc4x4567, params->neon.multiplier);
-    const int32x4_t vproduct4x89AB = vqrdmulhq_n_s32(vacc4x89AB, params->neon.multiplier);
-    const int32x4_t vproduct4xCDEF = vqrdmulhq_n_s32(vacc4xCDEF, params->neon.multiplier);
-    const int32x4_t vproduct5x0123 = vqrdmulhq_n_s32(vacc5x0123, params->neon.multiplier);
-    const int32x4_t vproduct5x4567 = vqrdmulhq_n_s32(vacc5x4567, params->neon.multiplier);
-    const int32x4_t vproduct5x89AB = vqrdmulhq_n_s32(vacc5x89AB, params->neon.multiplier);
-    const int32x4_t vproduct5xCDEF = vqrdmulhq_n_s32(vacc5xCDEF, params->neon.multiplier);
-    const int32x4_t vproduct6x0123 = vqrdmulhq_n_s32(vacc6x0123, params->neon.multiplier);
-    const int32x4_t vproduct6x4567 = vqrdmulhq_n_s32(vacc6x4567, params->neon.multiplier);
-    const int32x4_t vproduct6x89AB = vqrdmulhq_n_s32(vacc6x89AB, params->neon.multiplier);
-    const int32x4_t vproduct6xCDEF = vqrdmulhq_n_s32(vacc6xCDEF, params->neon.multiplier);
-    const int32x4_t vproduct7x0123 = vqrdmulhq_n_s32(vacc7x0123, params->neon.multiplier);
-    const int32x4_t vproduct7x4567 = vqrdmulhq_n_s32(vacc7x4567, params->neon.multiplier);
-    const int32x4_t vproduct7x89AB = vqrdmulhq_n_s32(vacc7x89AB, params->neon.multiplier);
-    const int32x4_t vproduct7xCDEF = vqrdmulhq_n_s32(vacc7xCDEF, params->neon.multiplier);
+    const int32x4_t vproduct0x0123 = vqrdmulhq_n_s32(vacc0x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct0x4567 = vqrdmulhq_n_s32(vacc0x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct0x89AB = vqrdmulhq_n_s32(vacc0x89AB, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct0xCDEF = vqrdmulhq_n_s32(vacc0xCDEF, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct1x0123 = vqrdmulhq_n_s32(vacc1x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct1x4567 = vqrdmulhq_n_s32(vacc1x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct1x89AB = vqrdmulhq_n_s32(vacc1x89AB, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct1xCDEF = vqrdmulhq_n_s32(vacc1xCDEF, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct2x0123 = vqrdmulhq_n_s32(vacc2x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct2x4567 = vqrdmulhq_n_s32(vacc2x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct2x89AB = vqrdmulhq_n_s32(vacc2x89AB, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct2xCDEF = vqrdmulhq_n_s32(vacc2xCDEF, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct3x0123 = vqrdmulhq_n_s32(vacc3x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct3x4567 = vqrdmulhq_n_s32(vacc3x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct3x89AB = vqrdmulhq_n_s32(vacc3x89AB, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct3xCDEF = vqrdmulhq_n_s32(vacc3xCDEF, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct4x0123 = vqrdmulhq_n_s32(vacc4x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct4x4567 = vqrdmulhq_n_s32(vacc4x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct4x89AB = vqrdmulhq_n_s32(vacc4x89AB, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct4xCDEF = vqrdmulhq_n_s32(vacc4xCDEF, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct5x0123 = vqrdmulhq_n_s32(vacc5x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct5x4567 = vqrdmulhq_n_s32(vacc5x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct5x89AB = vqrdmulhq_n_s32(vacc5x89AB, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct5xCDEF = vqrdmulhq_n_s32(vacc5xCDEF, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct6x0123 = vqrdmulhq_n_s32(vacc6x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct6x4567 = vqrdmulhq_n_s32(vacc6x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct6x89AB = vqrdmulhq_n_s32(vacc6x89AB, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct6xCDEF = vqrdmulhq_n_s32(vacc6xCDEF, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct7x0123 = vqrdmulhq_n_s32(vacc7x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct7x4567 = vqrdmulhq_n_s32(vacc7x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct7x89AB = vqrdmulhq_n_s32(vacc7x89AB, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct7xCDEF = vqrdmulhq_n_s32(vacc7xCDEF, params->gemmlowp_neon.multiplier);
 
     vacc0x0123 = vsraq_n_s32(vproduct0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vproduct0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -367,7 +367,7 @@
     vacc7x89AB = vrshlq_s32(vacc7x89AB, vright_shift);
     vacc7xCDEF = vrshlq_s32(vacc7xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -421,8 +421,8 @@
     int8x16_t vout6x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc6x01234567), vqmovn_s16(vacc6x89ABCDEF));
     int8x16_t vout7x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc7x01234567), vqmovn_s16(vacc7x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-gemm/gen/8x8c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/8x8c4-minmax-gemmlowp-neondot.c
index 1c9501d..a01922e 100644
--- a/src/qs8-gemm/gen/8x8c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/8x8c4-minmax-gemmlowp-neondot.c
@@ -195,25 +195,25 @@
     }
 
     // Post-accumulation work
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
 
-    const int32x4_t vproduct0x0123 = vqrdmulhq_n_s32(vacc0x0123, params->neon.multiplier);
-    const int32x4_t vproduct0x4567 = vqrdmulhq_n_s32(vacc0x4567, params->neon.multiplier);
-    const int32x4_t vproduct1x0123 = vqrdmulhq_n_s32(vacc1x0123, params->neon.multiplier);
-    const int32x4_t vproduct1x4567 = vqrdmulhq_n_s32(vacc1x4567, params->neon.multiplier);
-    const int32x4_t vproduct2x0123 = vqrdmulhq_n_s32(vacc2x0123, params->neon.multiplier);
-    const int32x4_t vproduct2x4567 = vqrdmulhq_n_s32(vacc2x4567, params->neon.multiplier);
-    const int32x4_t vproduct3x0123 = vqrdmulhq_n_s32(vacc3x0123, params->neon.multiplier);
-    const int32x4_t vproduct3x4567 = vqrdmulhq_n_s32(vacc3x4567, params->neon.multiplier);
-    const int32x4_t vproduct4x0123 = vqrdmulhq_n_s32(vacc4x0123, params->neon.multiplier);
-    const int32x4_t vproduct4x4567 = vqrdmulhq_n_s32(vacc4x4567, params->neon.multiplier);
-    const int32x4_t vproduct5x0123 = vqrdmulhq_n_s32(vacc5x0123, params->neon.multiplier);
-    const int32x4_t vproduct5x4567 = vqrdmulhq_n_s32(vacc5x4567, params->neon.multiplier);
-    const int32x4_t vproduct6x0123 = vqrdmulhq_n_s32(vacc6x0123, params->neon.multiplier);
-    const int32x4_t vproduct6x4567 = vqrdmulhq_n_s32(vacc6x4567, params->neon.multiplier);
-    const int32x4_t vproduct7x0123 = vqrdmulhq_n_s32(vacc7x0123, params->neon.multiplier);
-    const int32x4_t vproduct7x4567 = vqrdmulhq_n_s32(vacc7x4567, params->neon.multiplier);
+    const int32x4_t vproduct0x0123 = vqrdmulhq_n_s32(vacc0x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct0x4567 = vqrdmulhq_n_s32(vacc0x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct1x0123 = vqrdmulhq_n_s32(vacc1x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct1x4567 = vqrdmulhq_n_s32(vacc1x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct2x0123 = vqrdmulhq_n_s32(vacc2x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct2x4567 = vqrdmulhq_n_s32(vacc2x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct3x0123 = vqrdmulhq_n_s32(vacc3x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct3x4567 = vqrdmulhq_n_s32(vacc3x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct4x0123 = vqrdmulhq_n_s32(vacc4x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct4x4567 = vqrdmulhq_n_s32(vacc4x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct5x0123 = vqrdmulhq_n_s32(vacc5x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct5x4567 = vqrdmulhq_n_s32(vacc5x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct6x0123 = vqrdmulhq_n_s32(vacc6x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct6x4567 = vqrdmulhq_n_s32(vacc6x4567, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct7x0123 = vqrdmulhq_n_s32(vacc7x0123, params->gemmlowp_neon.multiplier);
+    const int32x4_t vproduct7x4567 = vqrdmulhq_n_s32(vacc7x4567, params->gemmlowp_neon.multiplier);
 
     vacc0x0123 = vsraq_n_s32(vproduct0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vproduct0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -249,7 +249,7 @@
     vacc7x0123 = vrshlq_s32(vacc7x0123, vright_shift);
     vacc7x4567 = vrshlq_s32(vacc7x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -279,8 +279,8 @@
     int8x16_t vout4x01234567_5x01234567 = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc5x01234567));
     int8x16_t vout6x01234567_7x01234567 = vcombine_s8(vqmovn_s16(vacc6x01234567), vqmovn_s16(vacc7x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
diff --git a/src/qs8-gemm/neon-mlal-lane.c.in b/src/qs8-gemm/neon-mlal-lane.c.in
index 6db2cae..e1a2829 100644
--- a/src/qs8-gemm/neon-mlal-lane.c.in
+++ b/src/qs8-gemm/neon-mlal-lane.c.in
@@ -174,12 +174,12 @@
       }
     }
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     $for M in range(MR):
       $for N in range(0, NR, 4):
         vacc${M}x${ABC[N:N+4]} = vqrdmulhq_s32(vacc${M}x${ABC[N:N+4]}, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     $for M in range(MR):
       $for N in range(0, NR, 4):
@@ -189,7 +189,7 @@
       $for N in range(0, NR, 4):
         vacc${M}x${ABC[N:N+4]} = vrshlq_s32(vacc${M}x${ABC[N:N+4]}, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     $for M in range(MR):
       $for N in range(0, NR, 8):
@@ -218,11 +218,11 @@
           int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
 #endif
     $if NR == 8 and MR == 1:
-      const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-      const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+      const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
     $else:
-      const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-      const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+      const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     $for M in range(MR):
       $for N in range(0, NR, 16):
diff --git a/src/qs8-gemm/neon-mull-addw-dup.c.in b/src/qs8-gemm/neon-mull-addw-dup.c.in
index 42e01dd..8cbf8aa 100644
--- a/src/qs8-gemm/neon-mull-addw-dup.c.in
+++ b/src/qs8-gemm/neon-mull-addw-dup.c.in
@@ -158,12 +158,12 @@
         }
       }
     }
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     $for M in range(MR):
       $for N in range(0, NR, 4):
         vacc${M}x${ABC[N:N+4]} = vqrdmulhq_s32(vacc${M}x${ABC[N:N+4]}, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     $for M in range(MR):
       $for N in range(0, NR, 4):
@@ -173,7 +173,7 @@
       $for N in range(0, NR, 4):
         vacc${M}x${ABC[N:N+4]} = vrshlq_s32(vacc${M}x${ABC[N:N+4]}, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     $for M in range(MR):
       $for N in range(0, NR, 8):
@@ -202,11 +202,11 @@
           int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
 #endif
     $if NR == 8 and MR == 1:
-      const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-      const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+      const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
     $else:
-      const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-      const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+      const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     $for M in range(MR):
       $for N in range(0, NR, 16):
diff --git a/src/qs8-gemm/scalar.c.in b/src/qs8-gemm/scalar.c.in
index 4b813a2..4d8f133 100644
--- a/src/qs8-gemm/scalar.c.in
+++ b/src/qs8-gemm/scalar.c.in
@@ -71,7 +71,7 @@
       k -= sizeof(int8_t);
     } while (k != 0);
 
-    const int32_t vmultiplier = params->scalar.multiplier;
+    const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
     $for M in range(MR):
       $for N in range(NR):
         const int64_t vproduct${M}x${N} = (int64_t) vacc${M}x${N} * (int64_t) vmultiplier;
@@ -81,28 +81,28 @@
       $for N in range(NR):
         const int32_t vq31product${M}x${N} = (int32_t) (uint32_t) ((uint64_t) (vproduct${M}x${N} + vq31rounding) >> 31);
 
-    const int32_t vremainder_mask = params->scalar.remainder_mask;
+    const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
     $for M in range(MR):
       $for N in range(NR):
         const int32_t vremainder${M}x${N} = (vq31product${M}x${N} & vremainder_mask) - (int32_t) (vq31product${M}x${N} < 0);
 
-    const uint32_t vshift = params->scalar.shift;
-    const int32_t vremainder_threshold = params->scalar.remainder_threshold;
+    const uint32_t vshift = params->gemmlowp_scalar.shift;
+    const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
     $for M in range(MR):
       $for N in range(NR):
         int32_t vout${M}x${N} = asr_s32(vq31product${M}x${N}, vshift) + (int32_t) (vremainder${M}x${N} > vremainder_threshold);
 
-    const int32_t vout_min = params->scalar.output_min_less_zero_point;
+    const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
     $for M in range(MR):
       $for N in range(NR):
         vout${M}x${N} = math_max_s32(vout${M}x${N}, vout_min);
 
-    const int32_t vout_max = params->scalar.output_max_less_zero_point;
+    const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
     $for M in range(MR):
       $for N in range(NR):
         vout${M}x${N} = math_min_s32(vout${M}x${N}, vout_max);
 
-    const int32_t voutput_zero_point = params->scalar.output_zero_point;
+    const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
     $for M in range(MR):
       $for N in range(NR):
         vout${M}x${N} += voutput_zero_point;
diff --git a/src/qs8-igemm/MRx16c8-avx512skx.c.in b/src/qs8-igemm/MRx16c8-avx512skx.c.in
index 5896443..1ac67b4 100644
--- a/src/qs8-igemm/MRx16c8-avx512skx.c.in
+++ b/src/qs8-igemm/MRx16c8-avx512skx.c.in
@@ -58,24 +58,24 @@
 
   const __mmask16 vbias_mask = _cvtu32_mask16(0x1111);
   const __mmask16 vblend_mask = _cvtu32_mask16(0xAAAA);
-  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.multiplier));
-  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.rounding));
-  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.remainder_mask));
-  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.remainder_threshold));
-  const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier));
+  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding));
+  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask));
+  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold));
+  const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
   $if MR > 1:
-    const __m512i voutput_zero_point = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.output_zero_point));
+    const __m512i voutput_zero_point = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point));
   $else:
-    const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse4.output_zero_point));
+    const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point));
   $if MR > 2:
-    const __m512i voutput_min = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.output_min));
-    const __m512i voutput_max = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.output_max));
+    const __m512i voutput_min = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    const __m512i voutput_max = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
   $elif MR == 2:
-    const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse4.output_min));
-    const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse4.output_max));
+    const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
   $else:
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
   do {
     __m512i vacc0x0123 = _mm512_maskz_expandloadu_epi32(vbias_mask, w);
     $for N in range(4, 16, 4):
diff --git a/src/qs8-igemm/MRx4c2-sse.c.in b/src/qs8-igemm/MRx4c2-sse.c.in
index 1d58a24..d08425d 100644
--- a/src/qs8-igemm/MRx4c2-sse.c.in
+++ b/src/qs8-igemm/MRx4c2-sse.c.in
@@ -25,7 +25,7 @@
 #include <xnnpack/math.h>
 
 
-$PARAMS_STRUCT = "sse4" if SSE >= 4 else "sse2"
+$PARAMS_STRUCT = "gemmlowp_sse4" if SSE >= 4 else "gemmlowp_sse2"
 $ISA = "xop" if XOP else "avx" if AVX else {2: "sse2", 3: "ssse3", 4: "sse41"}[SSE]
 void xnn_qs8_igemm_minmax_gemmlowp_ukernel_${MR}x4c2__${ISA}_${VARIANT.lower()}(
     size_t mr,
@@ -275,8 +275,8 @@
       __m128i vacc${M}${min(M+1, MR-1)}x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc${M}x0123, vacc${min(M+1, MR-1)}x0123), voutput_zero_point);
 
     $if SSE < 4:
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
       $for M in range(0, MR, 2):
         vacc${M}${min(M+1, MR-1)}x0123 = _mm_min_epi16(_mm_max_epi16(vacc${M}${min(M+1, MR-1)}x0123, voutput_min), voutput_max);
 
@@ -286,8 +286,8 @@
       __m128i vout = _mm_packs_epi16(vacc0${min(1, MR-1)}x0123, vacc0${min(1, MR-1)}x0123);
 
     $if SSE == 4:
-      vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-      vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+      vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+      vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       $for M in reversed(range(1, MR)):
diff --git a/src/qs8-igemm/MRx4c8-sse.c.in b/src/qs8-igemm/MRx4c8-sse.c.in
index ef7ac64..b616122 100644
--- a/src/qs8-igemm/MRx4c8-sse.c.in
+++ b/src/qs8-igemm/MRx4c8-sse.c.in
@@ -25,7 +25,7 @@
 #include <xnnpack/math.h>
 
 
-$PARAMS_STRUCT = "sse4" if SSE >= 4 else "sse2"
+$PARAMS_STRUCT = "gemmlowp_sse4" if SSE >= 4 else "gemmlowp_sse2"
 $ISA = "xop" if XOP else "avx" if AVX else {2: "sse2", 3: "ssse3", 4: "sse41"}[SSE]
 void xnn_qs8_igemm_minmax_gemmlowp_ukernel_${MR}x4c8__${ISA}_${VARIANT.lower()}(
     size_t mr,
@@ -233,8 +233,8 @@
       __m128i vacc${M}${min(M+1, MR-1)}x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc${M}x0123, vacc${min(M+1, MR-1)}x0123), voutput_zero_point);
 
     $if SSE < 4:
-      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+      const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+      const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
       $for M in range(0, MR, 2):
         vacc${M}${min(M+1, MR-1)}x0123 = _mm_min_epi16(_mm_max_epi16(vacc${M}${min(M+1, MR-1)}x0123, voutput_min), voutput_max);
 
@@ -244,8 +244,8 @@
       __m128i vout = _mm_packs_epi16(vacc0${min(1, MR-1)}x0123, vacc0${min(1, MR-1)}x0123);
 
     $if SSE == 4:
-      vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-      vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+      vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+      vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       $for M in reversed(range(1, MR)):
diff --git a/src/qs8-igemm/MRx4c8-wasmsimd.c.in b/src/qs8-igemm/MRx4c8-wasmsimd.c.in
index c55267f..014dadd 100644
--- a/src/qs8-igemm/MRx4c8-wasmsimd.c.in
+++ b/src/qs8-igemm/MRx4c8-wasmsimd.c.in
@@ -141,8 +141,8 @@
     $for M in range(MR):
       const v128_t vacc${M}x01 = wasm_v32x4_shuffle(vacc${M}x0123, vsign${M}x0123, 0, 4, 1, 5);
 
-    const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-    const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+    const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+    const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
     $for M in range(MR):
       const v128_t vprod${M}x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc${M}x01, vmultiplier), vrounding);
       const v128_t vacc${M}x23 = wasm_v32x4_shuffle(vacc${M}x0123, vsign${M}x0123, 2, 6, 3, 7);
@@ -153,16 +153,16 @@
     $for M in range(MR):
       const v128_t vq31prod${M}x0123 = wasm_v32x4_shuffle(vprod${M}x01, vprod${M}x23, 1, 3, 5, 7);
 
-    const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+    const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
     $for M in range(MR):
       const v128_t vrem${M}x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod${M}x0123, vremainder_mask), wasm_i32x4_lt(vq31prod${M}x0123, vzero));
 
-    const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-    const int32_t vshift = params->wasmsimd.shift;
+    const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+    const int32_t vshift = params->gemmlowp_wasmsimd.shift;
     $for M in range(MR):
       vacc${M}x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod${M}x0123, vshift), wasm_i32x4_gt(vrem${M}x0123, vthreshold));
 
-    const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+    const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
     $for M in range(0, MR, 2):
       v128_t vacc${M}${min(M+1, MR-1)}x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc${M}x0123, vacc${min(M+1, MR-1)}x0123), voutput_zero_point);
 
@@ -171,10 +171,10 @@
     $else:
       v128_t vout = wasm_i8x16_narrow_i16x8(vacc0${min(1, MR-1)}x0123, vacc0${min(1, MR-1)}x0123);
 
-    const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
+    const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
     vout = wasm_i8x16_max(vout, voutput_min);
 
-    const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+    const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
     vout = wasm_i8x16_min(vout, voutput_max);
 
     if (nc >= 4) {
diff --git a/src/qs8-igemm/MRx8c8-avx2.c.in b/src/qs8-igemm/MRx8c8-avx2.c.in
index a78ef8a..f18999e 100644
--- a/src/qs8-igemm/MRx8c8-avx2.c.in
+++ b/src/qs8-igemm/MRx8c8-avx2.c.in
@@ -3,6 +3,7 @@
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 
+$assert REQUANTIZATION in ["GEMMLOWP", "FP32"]
 $assert MR <= 4
 #include <assert.h>
 
@@ -13,7 +14,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_igemm_minmax_gemmlowp_ukernel_${MR}x8c8__avx2(
+void xnn_qs8_igemm_minmax_${REQUANTIZATION.lower()}_ukernel_${MR}x8c8__avx2(
     size_t mr,
     size_t nc,
     size_t kc,
@@ -111,40 +112,51 @@
     $for M in range(MR):
       __m256i vacc${M}x01234567 = _mm256_permutevar8x32_epi32(vacc${M}x02461357, vpermute_mask);
 
-    const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-    const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+    $if REQUANTIZATION == "GEMMLOWP":
+      const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+      const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
-    $for M in range(MR):
-      const __m256i vacc${M}x11335577 = _mm256_shuffle_epi32(vacc${M}x01234567, _MM_SHUFFLE(3, 3, 1, 1));
+      $for M in range(MR):
+        const __m256i vacc${M}x11335577 = _mm256_shuffle_epi32(vacc${M}x01234567, _MM_SHUFFLE(3, 3, 1, 1));
 
-    $for M in range(MR):
-      const __m256i vprod${M}x0246 = _mm256_add_epi64(_mm256_mul_epi32(vacc${M}x01234567, vmultiplier), vrounding);
+      $for M in range(MR):
+        const __m256i vprod${M}x0246 = _mm256_add_epi64(_mm256_mul_epi32(vacc${M}x01234567, vmultiplier), vrounding);
 
-    $for M in range(MR):
-      const __m256i vprod${M}x1357 = _mm256_add_epi64(_mm256_mul_epi32(vacc${M}x11335577, vmultiplier), vrounding);
+      $for M in range(MR):
+        const __m256i vprod${M}x1357 = _mm256_add_epi64(_mm256_mul_epi32(vacc${M}x11335577, vmultiplier), vrounding);
 
-    $for M in range(MR):
-      const __m256i vq31prod${M}x0246 = _mm256_srli_epi64(vprod${M}x0246, 31);
-      const __m256i vq31prod${M}x1357 = _mm256_add_epi64(vprod${M}x1357, vprod${M}x1357);
+      $for M in range(MR):
+        const __m256i vq31prod${M}x0246 = _mm256_srli_epi64(vprod${M}x0246, 31);
+        const __m256i vq31prod${M}x1357 = _mm256_add_epi64(vprod${M}x1357, vprod${M}x1357);
 
-    $for M in range(MR):
-      const __m256i vq31prod${M}x01234567 = _mm256_blend_epi16(vq31prod${M}x0246, vq31prod${M}x1357, 0xCC);
+      $for M in range(MR):
+        const __m256i vq31prod${M}x01234567 = _mm256_blend_epi16(vq31prod${M}x0246, vq31prod${M}x1357, 0xCC);
 
-    const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
-    $for M in range(MR):
-      const __m256i vrem${M}x01234567 =
-        _mm256_add_epi32(_mm256_and_si256(vq31prod${M}x01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod${M}x01234567));
+      const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
+      $for M in range(MR):
+        const __m256i vrem${M}x01234567 =
+          _mm256_add_epi32(_mm256_and_si256(vq31prod${M}x01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod${M}x01234567));
 
-    const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-    $if M > 1:
-      const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
+      const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+      $if M > 1:
+        const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_avx2.shift);
+      $else:
+        const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.shift);
+      $for M in range(MR):
+        vacc${M}x01234567 =
+          _mm256_sub_epi32(_mm256_sra_epi32(vq31prod${M}x01234567, vshift), _mm256_cmpgt_epi32(vrem${M}x01234567, vremainder_threshold));
     $else:
-      const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
-    $for M in range(MR):
-      vacc${M}x01234567 =
-        _mm256_sub_epi32(_mm256_sra_epi32(vq31prod${M}x01234567, vshift), _mm256_cmpgt_epi32(vrem${M}x01234567, vremainder_threshold));
+      $for M in range(MR):
+        __m256 vscaled${M}x01234567 = _mm256_cvtepi32_ps(vacc${M}x01234567);
 
-    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx2.output_zero_point);
+      const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+      $for M in range(MR):
+        vscaled${M}x01234567 = _mm256_mul_ps(vscaled${M}x01234567, vscale);
+
+      $for M in range(MR):
+        vacc${M}x01234567 = _mm256_cvtps_epi32(vscaled${M}x01234567);
+
+    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->${REQUANTIZATION.lower()}_avx2.output_zero_point);
     $for M in range(0, MR, 2):
       __m256i vacc${M}${min(M+1, MR-1)}x01234567 = _mm256_adds_epi16(_mm256_packs_epi32(vacc${M}x01234567, vacc${min(M+1, MR-1)}x01234567), voutput_zero_point);
 
@@ -156,8 +168,8 @@
     $else:
       __m256i vout = _mm256_packs_epi16(vacc0${min(1, MR-1)}x01234567, vacc0${min(1, MR-1)}x01234567);
 
-    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->avx2.output_min));
-    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->avx2.output_max));
+    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->${REQUANTIZATION.lower()}_avx2.output_min));
+    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->${REQUANTIZATION.lower()}_avx2.output_max));
 
     __m128i vout_lo = _mm256_castsi256_si128(vout);
     __m128i vout_hi = _mm256_extracti128_si256(vout, 1);
diff --git a/src/qs8-igemm/MRxNRc4-neondot.c.in b/src/qs8-igemm/MRxNRc4-neondot.c.in
index 01b243d..bce09a9 100644
--- a/src/qs8-igemm/MRxNRc4-neondot.c.in
+++ b/src/qs8-igemm/MRxNRc4-neondot.c.in
@@ -111,12 +111,12 @@
       p -= ${MR} * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     $for M in range(MR):
       $for N in range(0, NR, 4):
         vacc${M}x${ABC[N:N+4]} = vqrdmulhq_s32(vacc${M}x${ABC[N:N+4]}, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     $for M in range(MR):
       $for N in range(0, NR, 4):
@@ -126,7 +126,7 @@
       $for N in range(0, NR, 4):
         vacc${M}x${ABC[N:N+4]} = vrshlq_s32(vacc${M}x${ABC[N:N+4]}, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     $for M in range(MR):
       $for N in range(0, NR, 8):
@@ -155,11 +155,11 @@
           int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
 #endif
     $if NR == 8 and MR == 1:
-      const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-      const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+      const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
     $else:
-      const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-      const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+      const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     $for M in reversed(range(MR)):
       $for N in range(0, NR, 16):
diff --git a/src/qs8-igemm/c16-neon-mlal-padal.c.in b/src/qs8-igemm/c16-neon-mlal-padal.c.in
index 272cb0a..12a9e46 100644
--- a/src/qs8-igemm/c16-neon-mlal-padal.c.in
+++ b/src/qs8-igemm/c16-neon-mlal-padal.c.in
@@ -115,12 +115,12 @@
         int32x4_t vacc${M}x${ABC[N:N+4]} = vcombine_s32(vsum${M}x${ABC[N:N+2]}, vsum${M}x${ABC[N+2:N+4]} );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     $for M in range(MR):
       $for N in range(0, NR, 4):
         vacc${M}x${ABC[N:N+4]} = vqrdmulhq_s32(vacc${M}x${ABC[N:N+4]}, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     $for M in range(MR):
       $for N in range(0, NR, 4):
@@ -130,7 +130,7 @@
       $for N in range(0, NR, 4):
         vacc${M}x${ABC[N:N+4]} = vrshlq_s32(vacc${M}x${ABC[N:N+4]}, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     $for M in range(MR):
       $for N in range(0, NR, 8):
@@ -158,11 +158,11 @@
           int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
 #endif
     $if NR == 8 and MR == 1:
-      const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-      const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+      const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
     $else:
-      const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-      const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+      const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     $for M in reversed(range(MR)):
       $for N in range(0, NR, 16):
diff --git a/src/qs8-igemm/c2-neon-mull-padal-dup.c.in b/src/qs8-igemm/c2-neon-mull-padal-dup.c.in
index b142393..5d854f2 100644
--- a/src/qs8-igemm/c2-neon-mull-padal-dup.c.in
+++ b/src/qs8-igemm/c2-neon-mull-padal-dup.c.in
@@ -150,12 +150,12 @@
       p -= ${MR} * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     $for M in range(MR):
       $for N in range(0, NR, 4):
         vacc${M}x${ABC[N:N+4]} = vqrdmulhq_s32(vacc${M}x${ABC[N:N+4]}, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     $for M in range(MR):
       $for N in range(0, NR, 4):
@@ -165,7 +165,7 @@
       $for N in range(0, NR, 4):
         vacc${M}x${ABC[N:N+4]} = vrshlq_s32(vacc${M}x${ABC[N:N+4]}, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     $for M in range(MR):
       $for N in range(0, NR, 8):
@@ -194,11 +194,11 @@
           int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
 #endif
     $if NR == 8 and MR == 1:
-      const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-      const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+      const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
     $else:
-      const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-      const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+      const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     $for M in reversed(range(MR)):
       $for N in range(0, NR, 16):
diff --git a/src/qs8-igemm/c8-neon-mull-padal.c.in b/src/qs8-igemm/c8-neon-mull-padal.c.in
index 5ba099b..3f0aecc 100644
--- a/src/qs8-igemm/c8-neon-mull-padal.c.in
+++ b/src/qs8-igemm/c8-neon-mull-padal.c.in
@@ -133,12 +133,12 @@
         int32x4_t vacc${M}x${ABC[N:N+4]} = vcombine_s32(vsum${M}x${ABC[N:N+2]}, vsum${M}x${ABC[N+2:N+4]} );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     $for M in range(MR):
       $for N in range(0, NR, 4):
         vacc${M}x${ABC[N:N+4]} = vqrdmulhq_s32(vacc${M}x${ABC[N:N+4]}, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     $for M in range(MR):
       $for N in range(0, NR, 4):
@@ -148,7 +148,7 @@
       $for N in range(0, NR, 4):
         vacc${M}x${ABC[N:N+4]} = vrshlq_s32(vacc${M}x${ABC[N:N+4]}, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     $for M in range(MR):
       $for N in range(0, NR, 8):
@@ -176,11 +176,11 @@
           int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
 #endif
     $if NR == 8 and MR == 1:
-      const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-      const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+      const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
     $else:
-      const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-      const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+      const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     $for M in reversed(range(MR)):
       $for N in range(0, NR, 16):
diff --git a/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 6926c15..937b755 100644
--- a/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -237,13 +237,13 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
     vacc0xCDEF = vqrdmulhq_s32(vacc0xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -255,7 +255,7 @@
     vacc0x89AB = vrshlq_s32(vacc0x89AB, vright_shift);
     vacc0xCDEF = vrshlq_s32(vacc0xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -267,8 +267,8 @@
 
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
 
diff --git a/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane.c
index 8c3108f..822e036 100644
--- a/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -235,13 +235,13 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
     vacc0xCDEF = vqrdmulhq_s32(vacc0xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -253,7 +253,7 @@
     vacc0x89AB = vrshlq_s32(vacc0x89AB, vright_shift);
     vacc0xCDEF = vrshlq_s32(vacc0xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -265,8 +265,8 @@
 
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
 
diff --git a/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mull-addw-dup.c
index b9f356c..862974e 100644
--- a/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-igemm/gen/1x16-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -231,13 +231,13 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
     vacc0xCDEF = vqrdmulhq_s32(vacc0xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -249,7 +249,7 @@
     vacc0x89AB = vrshlq_s32(vacc0x89AB, vright_shift);
     vacc0xCDEF = vrshlq_s32(vacc0xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -261,8 +261,8 @@
 
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
 
diff --git a/src/qs8-igemm/gen/1x16c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/1x16c16-minmax-gemmlowp-neon-mlal-padal.c
index 5d8934d..9137445 100644
--- a/src/qs8-igemm/gen/1x16c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/1x16c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -190,13 +190,13 @@
     int32x4_t vacc0xCDEF = vcombine_s32(vsum0xCD, vsum0xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
     vacc0xCDEF = vqrdmulhq_s32(vacc0xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -208,7 +208,7 @@
     vacc0x89AB = vrshlq_s32(vacc0x89AB, vright_shift);
     vacc0xCDEF = vrshlq_s32(vacc0xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -219,8 +219,8 @@
 
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
 
diff --git a/src/qs8-igemm/gen/1x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-igemm/gen/1x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index ff1e54a..cd8e764 100644
--- a/src/qs8-igemm/gen/1x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-igemm/gen/1x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -256,13 +256,13 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
     vacc0xCDEF = vqrdmulhq_s32(vacc0xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -274,7 +274,7 @@
     vacc0x89AB = vrshlq_s32(vacc0x89AB, vright_shift);
     vacc0xCDEF = vrshlq_s32(vacc0xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -286,8 +286,8 @@
 
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
 
diff --git a/src/qs8-igemm/gen/1x16c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-igemm/gen/1x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
index bea4a9c..2e154a7 100644
--- a/src/qs8-igemm/gen/1x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-igemm/gen/1x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -168,13 +168,13 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
     vacc0xCDEF = vqrdmulhq_s32(vacc0xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -186,7 +186,7 @@
     vacc0x89AB = vrshlq_s32(vacc0x89AB, vright_shift);
     vacc0xCDEF = vrshlq_s32(vacc0xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -198,8 +198,8 @@
 
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
 
diff --git a/src/qs8-igemm/gen/1x16c4-minmax-gemmlowp-neondot.c b/src/qs8-igemm/gen/1x16c4-minmax-gemmlowp-neondot.c
index ed5980e..33f23aa 100644
--- a/src/qs8-igemm/gen/1x16c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-igemm/gen/1x16c4-minmax-gemmlowp-neondot.c
@@ -106,13 +106,13 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
     vacc0xCDEF = vqrdmulhq_s32(vacc0xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -124,7 +124,7 @@
     vacc0x89AB = vrshlq_s32(vacc0x89AB, vright_shift);
     vacc0xCDEF = vrshlq_s32(vacc0xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -136,8 +136,8 @@
 
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
 
diff --git a/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-avx512skx.c b/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-avx512skx.c
index cb1e65e..1e17928 100644
--- a/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-avx512skx.c
+++ b/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-avx512skx.c
@@ -44,14 +44,14 @@
 
   const __mmask16 vbias_mask = _cvtu32_mask16(0x1111);
   const __mmask16 vblend_mask = _cvtu32_mask16(0xAAAA);
-  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.multiplier));
-  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.rounding));
-  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.remainder_mask));
-  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.remainder_threshold));
-  const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
-  const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse4.output_zero_point));
-  const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse4.output_min);
-  const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse4.output_max);
+  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier));
+  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding));
+  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask));
+  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold));
+  const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
+  const __m256i voutput_zero_point = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point));
+  const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min);
+  const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max);
   do {
     __m512i vacc0x0123 = _mm512_maskz_expandloadu_epi32(vbias_mask, w);
     __m512i vacc0x4567 = _mm512_maskz_expandloadu_epi32(vbias_mask, (const void*) ((uintptr_t) w + 4 * sizeof(int32_t)));
diff --git a/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-neon-mlal-padal.c
index 902ca23..c785638 100644
--- a/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -263,13 +263,13 @@
     int32x4_t vacc0xCDEF = vcombine_s32(vsum0xCD, vsum0xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
     vacc0xCDEF = vqrdmulhq_s32(vacc0xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -281,7 +281,7 @@
     vacc0x89AB = vrshlq_s32(vacc0x89AB, vright_shift);
     vacc0xCDEF = vrshlq_s32(vacc0xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -292,8 +292,8 @@
 
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
 
diff --git a/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-neon-mull-padal.c
index 447f2f9..1495452 100644
--- a/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-igemm/gen/1x16c8-minmax-gemmlowp-neon-mull-padal.c
@@ -174,13 +174,13 @@
     int32x4_t vacc0xCDEF = vcombine_s32(vsum0xCD, vsum0xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
     vacc0xCDEF = vqrdmulhq_s32(vacc0xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -192,7 +192,7 @@
     vacc0x89AB = vrshlq_s32(vacc0x89AB, vright_shift);
     vacc0xCDEF = vrshlq_s32(vacc0xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -203,8 +203,8 @@
 
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
 
diff --git a/src/qs8-igemm/gen/1x2-minmax-gemmlowp-scalar.c b/src/qs8-igemm/gen/1x2-minmax-gemmlowp-scalar.c
index 7e61341..64456f3 100644
--- a/src/qs8-igemm/gen/1x2-minmax-gemmlowp-scalar.c
+++ b/src/qs8-igemm/gen/1x2-minmax-gemmlowp-scalar.c
@@ -69,7 +69,7 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const int32_t vmultiplier = params->scalar.multiplier;
+    const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
     const int64_t vproduct0x0 = (int64_t) vacc0x0 * (int64_t) vmultiplier;
     const int64_t vproduct0x1 = (int64_t) vacc0x1 * (int64_t) vmultiplier;
 
@@ -77,24 +77,24 @@
     const int32_t vq31product0x0 = (int32_t) (uint32_t) ((uint64_t) (vproduct0x0 + vq31rounding) >> 31);
     const int32_t vq31product0x1 = (int32_t) (uint32_t) ((uint64_t) (vproduct0x1 + vq31rounding) >> 31);
 
-    const int32_t vremainder_mask = params->scalar.remainder_mask;
+    const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
     const int32_t vremainder0x0 = (vq31product0x0 & vremainder_mask) - (int32_t) (vq31product0x0 < 0);
     const int32_t vremainder0x1 = (vq31product0x1 & vremainder_mask) - (int32_t) (vq31product0x1 < 0);
 
-    const uint32_t vshift = params->scalar.shift;
-    const int32_t vremainder_threshold = params->scalar.remainder_threshold;
+    const uint32_t vshift = params->gemmlowp_scalar.shift;
+    const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
     int32_t vout0x0 = asr_s32(vq31product0x0, vshift) + (int32_t) (vremainder0x0 > vremainder_threshold);
     int32_t vout0x1 = asr_s32(vq31product0x1, vshift) + (int32_t) (vremainder0x1 > vremainder_threshold);
 
-    const int32_t vout_min = params->scalar.output_min_less_zero_point;
+    const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
     vout0x0 = math_max_s32(vout0x0, vout_min);
     vout0x1 = math_max_s32(vout0x1, vout_min);
 
-    const int32_t vout_max = params->scalar.output_max_less_zero_point;
+    const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
     vout0x0 = math_min_s32(vout0x0, vout_max);
     vout0x1 = math_min_s32(vout0x1, vout_max);
 
-    const int32_t voutput_zero_point = params->scalar.output_zero_point;
+    const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
     vout0x0 += voutput_zero_point;
     vout0x1 += voutput_zero_point;
 
diff --git a/src/qs8-igemm/gen/1x4-minmax-gemmlowp-scalar.c b/src/qs8-igemm/gen/1x4-minmax-gemmlowp-scalar.c
index f6e58e2..b8fd52a 100644
--- a/src/qs8-igemm/gen/1x4-minmax-gemmlowp-scalar.c
+++ b/src/qs8-igemm/gen/1x4-minmax-gemmlowp-scalar.c
@@ -75,7 +75,7 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const int32_t vmultiplier = params->scalar.multiplier;
+    const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
     const int64_t vproduct0x0 = (int64_t) vacc0x0 * (int64_t) vmultiplier;
     const int64_t vproduct0x1 = (int64_t) vacc0x1 * (int64_t) vmultiplier;
     const int64_t vproduct0x2 = (int64_t) vacc0x2 * (int64_t) vmultiplier;
@@ -87,32 +87,32 @@
     const int32_t vq31product0x2 = (int32_t) (uint32_t) ((uint64_t) (vproduct0x2 + vq31rounding) >> 31);
     const int32_t vq31product0x3 = (int32_t) (uint32_t) ((uint64_t) (vproduct0x3 + vq31rounding) >> 31);
 
-    const int32_t vremainder_mask = params->scalar.remainder_mask;
+    const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
     const int32_t vremainder0x0 = (vq31product0x0 & vremainder_mask) - (int32_t) (vq31product0x0 < 0);
     const int32_t vremainder0x1 = (vq31product0x1 & vremainder_mask) - (int32_t) (vq31product0x1 < 0);
     const int32_t vremainder0x2 = (vq31product0x2 & vremainder_mask) - (int32_t) (vq31product0x2 < 0);
     const int32_t vremainder0x3 = (vq31product0x3 & vremainder_mask) - (int32_t) (vq31product0x3 < 0);
 
-    const uint32_t vshift = params->scalar.shift;
-    const int32_t vremainder_threshold = params->scalar.remainder_threshold;
+    const uint32_t vshift = params->gemmlowp_scalar.shift;
+    const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
     int32_t vout0x0 = asr_s32(vq31product0x0, vshift) + (int32_t) (vremainder0x0 > vremainder_threshold);
     int32_t vout0x1 = asr_s32(vq31product0x1, vshift) + (int32_t) (vremainder0x1 > vremainder_threshold);
     int32_t vout0x2 = asr_s32(vq31product0x2, vshift) + (int32_t) (vremainder0x2 > vremainder_threshold);
     int32_t vout0x3 = asr_s32(vq31product0x3, vshift) + (int32_t) (vremainder0x3 > vremainder_threshold);
 
-    const int32_t vout_min = params->scalar.output_min_less_zero_point;
+    const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
     vout0x0 = math_max_s32(vout0x0, vout_min);
     vout0x1 = math_max_s32(vout0x1, vout_min);
     vout0x2 = math_max_s32(vout0x2, vout_min);
     vout0x3 = math_max_s32(vout0x3, vout_min);
 
-    const int32_t vout_max = params->scalar.output_max_less_zero_point;
+    const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
     vout0x0 = math_min_s32(vout0x0, vout_max);
     vout0x1 = math_min_s32(vout0x1, vout_max);
     vout0x2 = math_min_s32(vout0x2, vout_max);
     vout0x3 = math_min_s32(vout0x3, vout_max);
 
-    const int32_t voutput_zero_point = params->scalar.output_zero_point;
+    const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
     vout0x0 += voutput_zero_point;
     vout0x1 += voutput_zero_point;
     vout0x2 += voutput_zero_point;
diff --git a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-avx-ld128.c b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-avx-ld128.c
index 6e59c58..661d8ba 100644
--- a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-avx-ld128.c
@@ -118,8 +118,8 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -132,23 +132,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-avx-ld64.c b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-avx-ld64.c
index ac9acf0..7d25a0b 100644
--- a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-avx-ld64.c
@@ -118,8 +118,8 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -132,23 +132,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse2-ld128.c b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse2-ld128.c
index 36debb2..c0dcabd 100644
--- a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse2-ld128.c
@@ -118,8 +118,8 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
 
@@ -148,20 +148,20 @@
 
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc00x0123 = _mm_min_epi16(_mm_max_epi16(vacc00x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
diff --git a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse2-ld64.c b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse2-ld64.c
index ebc1887..05f8476 100644
--- a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse2-ld64.c
@@ -118,8 +118,8 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
 
@@ -148,20 +148,20 @@
 
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc00x0123 = _mm_min_epi16(_mm_max_epi16(vacc00x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
diff --git a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse41-ld128.c b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse41-ld128.c
index 7e79ae7..4ea067c 100644
--- a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse41-ld128.c
@@ -118,8 +118,8 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -132,23 +132,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse41-ld64.c b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse41-ld64.c
index 60cbccf..27eebfa 100644
--- a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-sse41-ld64.c
@@ -118,8 +118,8 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -132,23 +132,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld128.c
index ee07d39..7cd1c20 100644
--- a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld128.c
@@ -118,8 +118,8 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
 
@@ -148,20 +148,20 @@
 
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc00x0123 = _mm_min_epi16(_mm_max_epi16(vacc00x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
diff --git a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld64.c
index 0b148d1..4ac0e38 100644
--- a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld64.c
@@ -118,8 +118,8 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
 
@@ -148,20 +148,20 @@
 
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc00x0123 = _mm_min_epi16(_mm_max_epi16(vacc00x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
diff --git a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-xop-ld128.c b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-xop-ld128.c
index f4a1138..93e6feb 100644
--- a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-xop-ld128.c
@@ -123,8 +123,8 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -137,23 +137,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-xop-ld64.c b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-xop-ld64.c
index 3a55a85..86570d8 100644
--- a/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-igemm/gen/1x4c2-minmax-gemmlowp-xop-ld64.c
@@ -123,8 +123,8 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -137,23 +137,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-avx-ld128.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-avx-ld128.c
index 80623bc..9bad1ab 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-avx-ld128.c
@@ -90,8 +90,8 @@
 
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -104,23 +104,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-avx-ld64.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-avx-ld64.c
index fed83b2..5c58a64 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-avx-ld64.c
@@ -92,8 +92,8 @@
 
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -106,23 +106,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse2-ld128.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse2-ld128.c
index a9f0447..8429156 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse2-ld128.c
@@ -90,8 +90,8 @@
 
     __m128i vacc0x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x02, vacc0x13), _mm_unpackhi_epi32(vacc0x02, vacc0x13));
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
 
@@ -120,20 +120,20 @@
 
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc00x0123 = _mm_min_epi16(_mm_max_epi16(vacc00x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse2-ld64.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse2-ld64.c
index c82935c..b7a1222 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse2-ld64.c
@@ -92,8 +92,8 @@
 
     __m128i vacc0x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x02, vacc0x13), _mm_unpackhi_epi32(vacc0x02, vacc0x13));
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
 
@@ -122,20 +122,20 @@
 
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc00x0123 = _mm_min_epi16(_mm_max_epi16(vacc00x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse41-ld128.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse41-ld128.c
index 7620e4e..be97585 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse41-ld128.c
@@ -90,8 +90,8 @@
 
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -104,23 +104,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse41-ld64.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse41-ld64.c
index 8c2619a..f5d3dcf 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-sse41-ld64.c
@@ -92,8 +92,8 @@
 
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -106,23 +106,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld128.c
index 26c6287..8e085ec 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld128.c
@@ -90,8 +90,8 @@
 
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
 
@@ -120,20 +120,20 @@
 
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc00x0123 = _mm_min_epi16(_mm_max_epi16(vacc00x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld64.c
index 1e1ea15..83d2a34 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld64.c
@@ -92,8 +92,8 @@
 
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
 
@@ -122,20 +122,20 @@
 
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc00x0123 = _mm_min_epi16(_mm_max_epi16(vacc00x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld128.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld128.c
index cf0aec9..c98411d 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld128.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld128.c
@@ -104,8 +104,8 @@
 
     const v128_t vacc0x01 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 0, 4, 1, 5);
 
-    const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-    const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+    const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+    const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
     const v128_t vprod0x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc0x01, vmultiplier), vrounding);
     const v128_t vacc0x23 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 2, 6, 3, 7);
 
@@ -113,22 +113,22 @@
 
     const v128_t vq31prod0x0123 = wasm_v32x4_shuffle(vprod0x01, vprod0x23, 1, 3, 5, 7);
 
-    const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+    const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
     const v128_t vrem0x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0x0123, vremainder_mask), wasm_i32x4_lt(vq31prod0x0123, vzero));
 
-    const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-    const int32_t vshift = params->wasmsimd.shift;
+    const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+    const int32_t vshift = params->gemmlowp_wasmsimd.shift;
     vacc0x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0x0123, vshift), wasm_i32x4_gt(vrem0x0123, vthreshold));
 
-    const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+    const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
     v128_t vacc00x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0x0123, vacc0x0123), voutput_zero_point);
 
     v128_t vout = wasm_i8x16_narrow_i16x8(vacc00x0123, vacc00x0123);
 
-    const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
+    const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
     vout = wasm_i8x16_max(vout, voutput_min);
 
-    const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+    const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
     vout = wasm_i8x16_min(vout, voutput_max);
 
     if (nc >= 4) {
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld64.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld64.c
index 71c1d3e..ec51265 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld64.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld64.c
@@ -100,8 +100,8 @@
 
     const v128_t vacc0x01 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 0, 4, 1, 5);
 
-    const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-    const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+    const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+    const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
     const v128_t vprod0x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc0x01, vmultiplier), vrounding);
     const v128_t vacc0x23 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 2, 6, 3, 7);
 
@@ -109,22 +109,22 @@
 
     const v128_t vq31prod0x0123 = wasm_v32x4_shuffle(vprod0x01, vprod0x23, 1, 3, 5, 7);
 
-    const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+    const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
     const v128_t vrem0x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0x0123, vremainder_mask), wasm_i32x4_lt(vq31prod0x0123, vzero));
 
-    const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-    const int32_t vshift = params->wasmsimd.shift;
+    const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+    const int32_t vshift = params->gemmlowp_wasmsimd.shift;
     vacc0x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0x0123, vshift), wasm_i32x4_gt(vrem0x0123, vthreshold));
 
-    const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+    const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
     v128_t vacc00x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0x0123, vacc0x0123), voutput_zero_point);
 
     v128_t vout = wasm_i8x16_narrow_i16x8(vacc00x0123, vacc00x0123);
 
-    const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
+    const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
     vout = wasm_i8x16_max(vout, voutput_min);
 
-    const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+    const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
     vout = wasm_i8x16_min(vout, voutput_max);
 
     if (nc >= 4) {
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-xop-ld128.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-xop-ld128.c
index 8f9f58e..d9d0c90 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-xop-ld128.c
@@ -95,8 +95,8 @@
 
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -109,23 +109,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-xop-ld64.c b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-xop-ld64.c
index 061e4fb..11011b1 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-gemmlowp-xop-ld64.c
@@ -97,8 +97,8 @@
 
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -111,23 +111,23 @@
 
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc00x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc0x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc00x0123, vacc00x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c0) = (uint32_t) _mm_cvtsi128_si32(vout);
diff --git a/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index ffb8d1f..ded8e17 100644
--- a/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -166,11 +166,11 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -178,7 +178,7 @@
     vacc0x0123 = vrshlq_s32(vacc0x0123, vright_shift);
     vacc0x4567 = vrshlq_s32(vacc0x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
 
@@ -188,8 +188,8 @@
 
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
 #endif
-    const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-    const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+    const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567 = vmax_s8(vout0x01234567, voutput_min);
 
diff --git a/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c
index 3eb5dc4..f6571b9 100644
--- a/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -165,11 +165,11 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -177,7 +177,7 @@
     vacc0x0123 = vrshlq_s32(vacc0x0123, vright_shift);
     vacc0x4567 = vrshlq_s32(vacc0x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
 
@@ -187,8 +187,8 @@
 
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
 #endif
-    const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-    const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+    const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567 = vmax_s8(vout0x01234567, voutput_min);
 
diff --git a/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c
index 8f1b8ad..12af71f 100644
--- a/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-igemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -161,11 +161,11 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -173,7 +173,7 @@
     vacc0x0123 = vrshlq_s32(vacc0x0123, vright_shift);
     vacc0x4567 = vrshlq_s32(vacc0x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
 
@@ -183,8 +183,8 @@
 
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
 #endif
-    const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-    const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+    const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567 = vmax_s8(vout0x01234567, voutput_min);
 
diff --git a/src/qs8-igemm/gen/1x8c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/1x8c16-minmax-gemmlowp-neon-mlal-padal.c
index f15bf0b..3508bcf 100644
--- a/src/qs8-igemm/gen/1x8c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/1x8c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -130,11 +130,11 @@
     int32x4_t vacc0x4567 = vcombine_s32(vsum0x45, vsum0x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -142,7 +142,7 @@
     vacc0x0123 = vrshlq_s32(vacc0x0123, vright_shift);
     vacc0x4567 = vrshlq_s32(vacc0x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
@@ -151,8 +151,8 @@
 
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
 #endif
-    const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-    const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+    const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567 = vmax_s8(vout0x01234567, voutput_min);
 
diff --git a/src/qs8-igemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-igemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index a1735fe..81574a2 100644
--- a/src/qs8-igemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-igemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -172,11 +172,11 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -184,7 +184,7 @@
     vacc0x0123 = vrshlq_s32(vacc0x0123, vright_shift);
     vacc0x4567 = vrshlq_s32(vacc0x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
 
@@ -194,8 +194,8 @@
 
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
 #endif
-    const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-    const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+    const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567 = vmax_s8(vout0x01234567, voutput_min);
 
diff --git a/src/qs8-igemm/gen/1x8c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-igemm/gen/1x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
index f90333e..d678e34 100644
--- a/src/qs8-igemm/gen/1x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-igemm/gen/1x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -124,11 +124,11 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -136,7 +136,7 @@
     vacc0x0123 = vrshlq_s32(vacc0x0123, vright_shift);
     vacc0x4567 = vrshlq_s32(vacc0x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
 
@@ -146,8 +146,8 @@
 
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
 #endif
-    const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-    const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+    const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567 = vmax_s8(vout0x01234567, voutput_min);
 
diff --git a/src/qs8-igemm/gen/1x8c4-minmax-gemmlowp-neondot.c b/src/qs8-igemm/gen/1x8c4-minmax-gemmlowp-neondot.c
index 6178992..81bf586 100644
--- a/src/qs8-igemm/gen/1x8c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-igemm/gen/1x8c4-minmax-gemmlowp-neondot.c
@@ -92,11 +92,11 @@
       p -= 1 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -104,7 +104,7 @@
     vacc0x0123 = vrshlq_s32(vacc0x0123, vright_shift);
     vacc0x4567 = vrshlq_s32(vacc0x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
 
@@ -114,8 +114,8 @@
 
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
 #endif
-    const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-    const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+    const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567 = vmax_s8(vout0x01234567, voutput_min);
 
diff --git a/src/qs8-igemm/gen/1x8c8-minmax-fp32-avx2.c b/src/qs8-igemm/gen/1x8c8-minmax-fp32-avx2.c
new file mode 100644
index 0000000..dc3a7a3
--- /dev/null
+++ b/src/qs8-igemm/gen/1x8c8-minmax-fp32-avx2.c
@@ -0,0 +1,159 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-igemm/MRx8c8-avx2.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <immintrin.h>
+
+#include <xnnpack/igemm.h>
+#include <xnnpack/intrinsics-polyfill.h>
+#include <xnnpack/math.h>
+
+
+void xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2(
+    size_t mr,
+    size_t nc,
+    size_t kc,
+    size_t ks,
+    const int8_t** restrict a,
+    const void* restrict w,
+    int8_t* restrict c,
+    size_t cm_stride,
+    size_t cn_stride,
+    size_t a_offset,
+    const int8_t* zero,
+    const union xnn_qs8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN XNN_DISABLE_MSAN
+{
+  assert(mr != 0);
+  assert(mr <= 1);
+  assert(nc != 0);
+  assert(kc != 0);
+  assert(ks != 0);
+  assert(ks % (1 * sizeof(void*)) == 0);
+  assert(a_offset % sizeof(int8_t) == 0);
+  assert(a != NULL);
+  assert(w != NULL);
+  assert(c != NULL);
+
+  kc = round_up_po2(kc, 8);
+  int8_t* c0 = c;
+
+  do {
+    const __m128i vbias0x0 = _mm_loadu_si32(w);
+    const __m128i vbias0x1 = _mm_loadu_si32((const void*) ((uintptr_t) w + sizeof(int32_t)));
+    __m256i vacc0x01 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x0), vbias0x1, 1);
+    const __m128i vbias0x2 = _mm_loadu_si32((const void*) ((uintptr_t) w + 2 * sizeof(int32_t)));
+    const __m128i vbias0x3 = _mm_loadu_si32((const void*) ((uintptr_t) w + 3 * sizeof(int32_t)));
+    __m256i vacc0x23 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x2), vbias0x3, 1);
+    const __m128i vbias0x4 = _mm_loadu_si32((const void*) ((uintptr_t) w + 4 * sizeof(int32_t)));
+    const __m128i vbias0x5 = _mm_loadu_si32((const void*) ((uintptr_t) w + 5 * sizeof(int32_t)));
+    __m256i vacc0x45 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x4), vbias0x5, 1);
+    const __m128i vbias0x6 = _mm_loadu_si32((const void*) ((uintptr_t) w + 6 * sizeof(int32_t)));
+    const __m128i vbias0x7 = _mm_loadu_si32((const void*) ((uintptr_t) w + 7 * sizeof(int32_t)));
+    __m256i vacc0x67 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x6), vbias0x7, 1);
+    w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
+
+    size_t p = ks;
+    do {
+      const int8_t* restrict a0 = a[0];
+      if XNN_UNPREDICTABLE(a0 != zero) {
+        a0 = (const int8_t*) ((uintptr_t) a0 + a_offset);
+      }
+      a += 1;
+
+      size_t k = 0;
+      while (k < kc) {
+        const __m128i va0 = _mm_broadcastq_epi64(_mm_loadl_epi64((const __m128i*) a0));
+        const __m256i vxa0 = _mm256_cvtepi8_epi16(va0);
+        a0 += 8;
+
+        const __m128i vb01 = _mm_load_si128((const __m128i*) w);
+        const __m256i vxb01 = _mm256_cvtepi8_epi16(vb01);
+
+        vacc0x01 = _mm256_add_epi32(vacc0x01, _mm256_madd_epi16(vxa0, vxb01));
+        const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int8_t)));
+        const __m256i vxb23 = _mm256_cvtepi8_epi16(vb23);
+
+        vacc0x23 = _mm256_add_epi32(vacc0x23, _mm256_madd_epi16(vxa0, vxb23));
+        const __m128i vb45 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int8_t)));
+        const __m256i vxb45 = _mm256_cvtepi8_epi16(vb45);
+
+        vacc0x45 = _mm256_add_epi32(vacc0x45, _mm256_madd_epi16(vxa0, vxb45));
+        const __m128i vb67 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 48 * sizeof(int8_t)));
+        const __m256i vxb67 = _mm256_cvtepi8_epi16(vb67);
+
+        vacc0x67 = _mm256_add_epi32(vacc0x67, _mm256_madd_epi16(vxa0, vxb67));
+
+        w = (const void*) ((uintptr_t) w + 64 * sizeof(int8_t));
+        k += 8 * sizeof(int8_t);
+      }
+      p -= 1 * sizeof(void*);
+    } while (p != 0);
+
+    const __m256i vacc0x0213 = _mm256_hadd_epi32(vacc0x01, vacc0x23);
+    const __m256i vacc0x4657 = _mm256_hadd_epi32(vacc0x45, vacc0x67);
+
+    const __m256i vacc0x02461357 = _mm256_hadd_epi32(vacc0x0213, vacc0x4657);
+
+    const __m256i vpermute_mask = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
+    __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask);
+
+    __m256 vscaled0x01234567 = _mm256_cvtepi32_ps(vacc0x01234567);
+
+    const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+    vscaled0x01234567 = _mm256_mul_ps(vscaled0x01234567, vscale);
+
+    vacc0x01234567 = _mm256_cvtps_epi32(vscaled0x01234567);
+
+    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->fp32_avx2.output_zero_point);
+    __m256i vacc00x01234567 = _mm256_adds_epi16(_mm256_packs_epi32(vacc0x01234567, vacc0x01234567), voutput_zero_point);
+
+    vacc00x01234567 = _mm256_permute4x64_epi64(vacc00x01234567, _MM_SHUFFLE(3, 1, 2, 0));
+
+    __m256i vout = _mm256_packs_epi16(vacc00x01234567, vacc00x01234567);
+
+    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->fp32_avx2.output_min));
+    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->fp32_avx2.output_max));
+
+    __m128i vout_lo = _mm256_castsi256_si128(vout);
+    __m128i vout_hi = _mm256_extracti128_si256(vout, 1);
+
+    if (nc >= 8) {
+      _mm_storel_epi64((__m128i*) c0, vout_lo);
+
+      c0 = (int8_t*) ((uintptr_t) c0 + cn_stride);
+
+      a = (const int8_t**restrict) ((uintptr_t) a - ks);
+
+      nc -= 8;
+    } else {
+      if (nc & 4) {
+        _mm_storeu_si32(c0, vout_lo);
+
+        c0 += 4;
+
+        vout_lo = _mm_srli_epi64(vout_lo, 32);
+        vout_hi = _mm_srli_epi64(vout_hi, 32);
+      }
+      if (nc & 2) {
+        *((uint16_t*) c0) = (uint16_t) _mm_extract_epi16(vout_lo, 0);
+
+        c0 += 2;
+
+        vout_lo = _mm_srli_epi32(vout_lo, 16);
+        vout_hi = _mm_srli_epi32(vout_hi, 16);
+      }
+      if (nc & 1) {
+        *c0 = (int8_t) _mm_extract_epi8(vout_lo, 0);
+      }
+
+      nc = 0;
+    }
+  } while (nc != 0);
+}
diff --git a/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-avx2.c b/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-avx2.c
index 262d505..34aedf9 100644
--- a/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-avx2.c
+++ b/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-avx2.c
@@ -104,8 +104,8 @@
     const __m256i vpermute_mask = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
     __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask);
 
-    const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-    const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+    const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+    const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
     const __m256i vacc0x11335577 = _mm256_shuffle_epi32(vacc0x01234567, _MM_SHUFFLE(3, 3, 1, 1));
 
@@ -118,24 +118,24 @@
 
     const __m256i vq31prod0x01234567 = _mm256_blend_epi16(vq31prod0x0246, vq31prod0x1357, 0xCC);
 
-    const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+    const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
     const __m256i vrem0x01234567 =
       _mm256_add_epi32(_mm256_and_si256(vq31prod0x01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod0x01234567));
 
-    const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
+    const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.shift);
     vacc0x01234567 =
       _mm256_sub_epi32(_mm256_sra_epi32(vq31prod0x01234567, vshift), _mm256_cmpgt_epi32(vrem0x01234567, vremainder_threshold));
 
-    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx2.output_zero_point);
+    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_zero_point);
     __m256i vacc00x01234567 = _mm256_adds_epi16(_mm256_packs_epi32(vacc0x01234567, vacc0x01234567), voutput_zero_point);
 
     vacc00x01234567 = _mm256_permute4x64_epi64(vacc00x01234567, _MM_SHUFFLE(3, 1, 2, 0));
 
     __m256i vout = _mm256_packs_epi16(vacc00x01234567, vacc00x01234567);
 
-    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->avx2.output_min));
-    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->avx2.output_max));
+    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_min));
+    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_max));
 
     __m128i vout_lo = _mm256_castsi256_si128(vout);
     __m128i vout_hi = _mm256_extracti128_si256(vout, 1);
diff --git a/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-neon-mlal-padal.c
index 141c892..3bdf444 100644
--- a/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -171,11 +171,11 @@
     int32x4_t vacc0x4567 = vcombine_s32(vsum0x45, vsum0x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -183,7 +183,7 @@
     vacc0x0123 = vrshlq_s32(vacc0x0123, vright_shift);
     vacc0x4567 = vrshlq_s32(vacc0x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
@@ -192,8 +192,8 @@
 
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
 #endif
-    const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-    const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+    const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567 = vmax_s8(vout0x01234567, voutput_min);
 
diff --git a/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-neon-mull-padal.c
index 90b0238..34bf610 100644
--- a/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-igemm/gen/1x8c8-minmax-gemmlowp-neon-mull-padal.c
@@ -122,11 +122,11 @@
     int32x4_t vacc0x4567 = vcombine_s32(vsum0x45, vsum0x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -134,7 +134,7 @@
     vacc0x0123 = vrshlq_s32(vacc0x0123, vright_shift);
     vacc0x4567 = vrshlq_s32(vacc0x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
@@ -143,8 +143,8 @@
 
     int8x8_t vout0x01234567 = vqmovn_s16(vacc0x01234567);
 #endif
-    const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-    const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+    const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567 = vmax_s8(vout0x01234567, voutput_min);
 
diff --git a/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index c2ee2b1..c982ab3 100644
--- a/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -313,7 +313,7 @@
       p -= 2 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -323,7 +323,7 @@
     vacc1x89AB = vqrdmulhq_s32(vacc1x89AB, vmultiplier);
     vacc1xCDEF = vqrdmulhq_s32(vacc1xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -343,7 +343,7 @@
     vacc1x89AB = vrshlq_s32(vacc1x89AB, vright_shift);
     vacc1xCDEF = vrshlq_s32(vacc1xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -361,8 +361,8 @@
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c
index 103a96b..6531145 100644
--- a/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -311,7 +311,7 @@
       p -= 2 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -321,7 +321,7 @@
     vacc1x89AB = vqrdmulhq_s32(vacc1x89AB, vmultiplier);
     vacc1xCDEF = vqrdmulhq_s32(vacc1xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -341,7 +341,7 @@
     vacc1x89AB = vrshlq_s32(vacc1x89AB, vright_shift);
     vacc1xCDEF = vrshlq_s32(vacc1xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -359,8 +359,8 @@
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mull-addw-dup.c
index 3b90389..ce22534 100644
--- a/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-igemm/gen/2x16-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -335,7 +335,7 @@
       p -= 2 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -345,7 +345,7 @@
     vacc1x89AB = vqrdmulhq_s32(vacc1x89AB, vmultiplier);
     vacc1xCDEF = vqrdmulhq_s32(vacc1xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -365,7 +365,7 @@
     vacc1x89AB = vrshlq_s32(vacc1x89AB, vright_shift);
     vacc1xCDEF = vrshlq_s32(vacc1xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -383,8 +383,8 @@
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/2x16c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/2x16c16-minmax-gemmlowp-neon-mlal-padal.c
index 25dede9..9fc9af7 100644
--- a/src/qs8-igemm/gen/2x16c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/2x16c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -303,7 +303,7 @@
     int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -313,7 +313,7 @@
     vacc1x89AB = vqrdmulhq_s32(vacc1x89AB, vmultiplier);
     vacc1xCDEF = vqrdmulhq_s32(vacc1xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -333,7 +333,7 @@
     vacc1x89AB = vrshlq_s32(vacc1x89AB, vright_shift);
     vacc1xCDEF = vrshlq_s32(vacc1xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -350,8 +350,8 @@
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/2x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-igemm/gen/2x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 4a9edcb..07f01f8 100644
--- a/src/qs8-igemm/gen/2x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-igemm/gen/2x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -376,7 +376,7 @@
       p -= 2 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -386,7 +386,7 @@
     vacc1x89AB = vqrdmulhq_s32(vacc1x89AB, vmultiplier);
     vacc1xCDEF = vqrdmulhq_s32(vacc1xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -406,7 +406,7 @@
     vacc1x89AB = vrshlq_s32(vacc1x89AB, vright_shift);
     vacc1xCDEF = vrshlq_s32(vacc1xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -424,8 +424,8 @@
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/2x16c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-igemm/gen/2x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
index a742dc7..26f4388 100644
--- a/src/qs8-igemm/gen/2x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-igemm/gen/2x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -238,7 +238,7 @@
       p -= 2 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -248,7 +248,7 @@
     vacc1x89AB = vqrdmulhq_s32(vacc1x89AB, vmultiplier);
     vacc1xCDEF = vqrdmulhq_s32(vacc1xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -268,7 +268,7 @@
     vacc1x89AB = vrshlq_s32(vacc1x89AB, vright_shift);
     vacc1xCDEF = vrshlq_s32(vacc1xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -286,8 +286,8 @@
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-avx512skx.c b/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-avx512skx.c
index 1469cbd..28f4d21 100644
--- a/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-avx512skx.c
+++ b/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-avx512skx.c
@@ -48,14 +48,14 @@
 
   const __mmask16 vbias_mask = _cvtu32_mask16(0x1111);
   const __mmask16 vblend_mask = _cvtu32_mask16(0xAAAA);
-  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.multiplier));
-  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.rounding));
-  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.remainder_mask));
-  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.remainder_threshold));
-  const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
-  const __m512i voutput_zero_point = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.output_zero_point));
-  const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse4.output_min));
-  const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse4.output_max));
+  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier));
+  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding));
+  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask));
+  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold));
+  const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
+  const __m512i voutput_zero_point = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point));
+  const __m256i voutput_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+  const __m256i voutput_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
   do {
     __m512i vacc0x0123 = _mm512_maskz_expandloadu_epi32(vbias_mask, w);
     __m512i vacc0x4567 = _mm512_maskz_expandloadu_epi32(vbias_mask, (const void*) ((uintptr_t) w + 4 * sizeof(int32_t)));
diff --git a/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-neon-mlal-padal.c
index 14c22de..677f8e1 100644
--- a/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -410,7 +410,7 @@
     int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -420,7 +420,7 @@
     vacc1x89AB = vqrdmulhq_s32(vacc1x89AB, vmultiplier);
     vacc1xCDEF = vqrdmulhq_s32(vacc1xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -440,7 +440,7 @@
     vacc1x89AB = vrshlq_s32(vacc1x89AB, vright_shift);
     vacc1xCDEF = vrshlq_s32(vacc1xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -457,8 +457,8 @@
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-neon-mull-padal.c
index eab9f86..6f18bb7 100644
--- a/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-igemm/gen/2x16c8-minmax-gemmlowp-neon-mull-padal.c
@@ -271,7 +271,7 @@
     int32x4_t vacc1xCDEF = vcombine_s32(vsum1xCD, vsum1xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -281,7 +281,7 @@
     vacc1x89AB = vqrdmulhq_s32(vacc1x89AB, vmultiplier);
     vacc1xCDEF = vqrdmulhq_s32(vacc1xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -301,7 +301,7 @@
     vacc1x89AB = vrshlq_s32(vacc1x89AB, vright_shift);
     vacc1xCDEF = vrshlq_s32(vacc1xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -318,8 +318,8 @@
     int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCDEF));
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
     vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/2x2-minmax-gemmlowp-scalar.c b/src/qs8-igemm/gen/2x2-minmax-gemmlowp-scalar.c
index 08105fb..c8aa053 100644
--- a/src/qs8-igemm/gen/2x2-minmax-gemmlowp-scalar.c
+++ b/src/qs8-igemm/gen/2x2-minmax-gemmlowp-scalar.c
@@ -83,7 +83,7 @@
       p -= 2 * sizeof(void*);
     } while (p != 0);
 
-    const int32_t vmultiplier = params->scalar.multiplier;
+    const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
     const int64_t vproduct0x0 = (int64_t) vacc0x0 * (int64_t) vmultiplier;
     const int64_t vproduct0x1 = (int64_t) vacc0x1 * (int64_t) vmultiplier;
     const int64_t vproduct1x0 = (int64_t) vacc1x0 * (int64_t) vmultiplier;
@@ -95,32 +95,32 @@
     const int32_t vq31product1x0 = (int32_t) (uint32_t) ((uint64_t) (vproduct1x0 + vq31rounding) >> 31);
     const int32_t vq31product1x1 = (int32_t) (uint32_t) ((uint64_t) (vproduct1x1 + vq31rounding) >> 31);
 
-    const int32_t vremainder_mask = params->scalar.remainder_mask;
+    const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
     const int32_t vremainder0x0 = (vq31product0x0 & vremainder_mask) - (int32_t) (vq31product0x0 < 0);
     const int32_t vremainder0x1 = (vq31product0x1 & vremainder_mask) - (int32_t) (vq31product0x1 < 0);
     const int32_t vremainder1x0 = (vq31product1x0 & vremainder_mask) - (int32_t) (vq31product1x0 < 0);
     const int32_t vremainder1x1 = (vq31product1x1 & vremainder_mask) - (int32_t) (vq31product1x1 < 0);
 
-    const uint32_t vshift = params->scalar.shift;
-    const int32_t vremainder_threshold = params->scalar.remainder_threshold;
+    const uint32_t vshift = params->gemmlowp_scalar.shift;
+    const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
     int32_t vout0x0 = asr_s32(vq31product0x0, vshift) + (int32_t) (vremainder0x0 > vremainder_threshold);
     int32_t vout0x1 = asr_s32(vq31product0x1, vshift) + (int32_t) (vremainder0x1 > vremainder_threshold);
     int32_t vout1x0 = asr_s32(vq31product1x0, vshift) + (int32_t) (vremainder1x0 > vremainder_threshold);
     int32_t vout1x1 = asr_s32(vq31product1x1, vshift) + (int32_t) (vremainder1x1 > vremainder_threshold);
 
-    const int32_t vout_min = params->scalar.output_min_less_zero_point;
+    const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
     vout0x0 = math_max_s32(vout0x0, vout_min);
     vout0x1 = math_max_s32(vout0x1, vout_min);
     vout1x0 = math_max_s32(vout1x0, vout_min);
     vout1x1 = math_max_s32(vout1x1, vout_min);
 
-    const int32_t vout_max = params->scalar.output_max_less_zero_point;
+    const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
     vout0x0 = math_min_s32(vout0x0, vout_max);
     vout0x1 = math_min_s32(vout0x1, vout_max);
     vout1x0 = math_min_s32(vout1x0, vout_max);
     vout1x1 = math_min_s32(vout1x1, vout_max);
 
-    const int32_t voutput_zero_point = params->scalar.output_zero_point;
+    const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
     vout0x0 += voutput_zero_point;
     vout0x1 += voutput_zero_point;
     vout1x0 += voutput_zero_point;
diff --git a/src/qs8-igemm/gen/2x4-minmax-gemmlowp-scalar.c b/src/qs8-igemm/gen/2x4-minmax-gemmlowp-scalar.c
index 983266a..05fb1ca 100644
--- a/src/qs8-igemm/gen/2x4-minmax-gemmlowp-scalar.c
+++ b/src/qs8-igemm/gen/2x4-minmax-gemmlowp-scalar.c
@@ -93,7 +93,7 @@
       p -= 2 * sizeof(void*);
     } while (p != 0);
 
-    const int32_t vmultiplier = params->scalar.multiplier;
+    const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
     const int64_t vproduct0x0 = (int64_t) vacc0x0 * (int64_t) vmultiplier;
     const int64_t vproduct0x1 = (int64_t) vacc0x1 * (int64_t) vmultiplier;
     const int64_t vproduct0x2 = (int64_t) vacc0x2 * (int64_t) vmultiplier;
@@ -113,7 +113,7 @@
     const int32_t vq31product1x2 = (int32_t) (uint32_t) ((uint64_t) (vproduct1x2 + vq31rounding) >> 31);
     const int32_t vq31product1x3 = (int32_t) (uint32_t) ((uint64_t) (vproduct1x3 + vq31rounding) >> 31);
 
-    const int32_t vremainder_mask = params->scalar.remainder_mask;
+    const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
     const int32_t vremainder0x0 = (vq31product0x0 & vremainder_mask) - (int32_t) (vq31product0x0 < 0);
     const int32_t vremainder0x1 = (vq31product0x1 & vremainder_mask) - (int32_t) (vq31product0x1 < 0);
     const int32_t vremainder0x2 = (vq31product0x2 & vremainder_mask) - (int32_t) (vq31product0x2 < 0);
@@ -123,8 +123,8 @@
     const int32_t vremainder1x2 = (vq31product1x2 & vremainder_mask) - (int32_t) (vq31product1x2 < 0);
     const int32_t vremainder1x3 = (vq31product1x3 & vremainder_mask) - (int32_t) (vq31product1x3 < 0);
 
-    const uint32_t vshift = params->scalar.shift;
-    const int32_t vremainder_threshold = params->scalar.remainder_threshold;
+    const uint32_t vshift = params->gemmlowp_scalar.shift;
+    const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
     int32_t vout0x0 = asr_s32(vq31product0x0, vshift) + (int32_t) (vremainder0x0 > vremainder_threshold);
     int32_t vout0x1 = asr_s32(vq31product0x1, vshift) + (int32_t) (vremainder0x1 > vremainder_threshold);
     int32_t vout0x2 = asr_s32(vq31product0x2, vshift) + (int32_t) (vremainder0x2 > vremainder_threshold);
@@ -134,7 +134,7 @@
     int32_t vout1x2 = asr_s32(vq31product1x2, vshift) + (int32_t) (vremainder1x2 > vremainder_threshold);
     int32_t vout1x3 = asr_s32(vq31product1x3, vshift) + (int32_t) (vremainder1x3 > vremainder_threshold);
 
-    const int32_t vout_min = params->scalar.output_min_less_zero_point;
+    const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
     vout0x0 = math_max_s32(vout0x0, vout_min);
     vout0x1 = math_max_s32(vout0x1, vout_min);
     vout0x2 = math_max_s32(vout0x2, vout_min);
@@ -144,7 +144,7 @@
     vout1x2 = math_max_s32(vout1x2, vout_min);
     vout1x3 = math_max_s32(vout1x3, vout_min);
 
-    const int32_t vout_max = params->scalar.output_max_less_zero_point;
+    const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
     vout0x0 = math_min_s32(vout0x0, vout_max);
     vout0x1 = math_min_s32(vout0x1, vout_max);
     vout0x2 = math_min_s32(vout0x2, vout_max);
@@ -154,7 +154,7 @@
     vout1x2 = math_min_s32(vout1x2, vout_max);
     vout1x3 = math_min_s32(vout1x3, vout_max);
 
-    const int32_t voutput_zero_point = params->scalar.output_zero_point;
+    const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
     vout0x0 += voutput_zero_point;
     vout0x1 += voutput_zero_point;
     vout0x2 += voutput_zero_point;
diff --git a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-avx-ld128.c b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-avx-ld128.c
index 3578a5b..33a075a 100644
--- a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-avx-ld128.c
@@ -147,8 +147,8 @@
       p -= 2 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -167,27 +167,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c1) = (uint32_t) _mm_extract_epi32(vout, 1);
diff --git a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-avx-ld64.c b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-avx-ld64.c
index 5257f6e..2edacd9 100644
--- a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-avx-ld64.c
@@ -147,8 +147,8 @@
       p -= 2 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -167,27 +167,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c1) = (uint32_t) _mm_extract_epi32(vout, 1);
diff --git a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse2-ld128.c b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse2-ld128.c
index 9e8349d..8dada86 100644
--- a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse2-ld128.c
@@ -147,8 +147,8 @@
       p -= 2 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -191,24 +191,24 @@
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
diff --git a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse2-ld64.c b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse2-ld64.c
index c2f43d9..402b6d4 100644
--- a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse2-ld64.c
@@ -147,8 +147,8 @@
       p -= 2 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -191,24 +191,24 @@
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
diff --git a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse41-ld128.c b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse41-ld128.c
index 0d80b2d..cce3b5f 100644
--- a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse41-ld128.c
@@ -147,8 +147,8 @@
       p -= 2 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -167,27 +167,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c1) = (uint32_t) _mm_extract_epi32(vout, 1);
diff --git a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse41-ld64.c b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse41-ld64.c
index 2108f4e..9d7733e 100644
--- a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-sse41-ld64.c
@@ -147,8 +147,8 @@
       p -= 2 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -167,27 +167,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c1) = (uint32_t) _mm_extract_epi32(vout, 1);
diff --git a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld128.c
index f3dd6b7..c91fcd9 100644
--- a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld128.c
@@ -147,8 +147,8 @@
       p -= 2 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -191,24 +191,24 @@
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
diff --git a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld64.c
index ba9d551..f790aef 100644
--- a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld64.c
@@ -147,8 +147,8 @@
       p -= 2 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -191,24 +191,24 @@
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
diff --git a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-xop-ld128.c b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-xop-ld128.c
index 428a117..51cc3cf 100644
--- a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-xop-ld128.c
@@ -152,8 +152,8 @@
       p -= 2 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -172,27 +172,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c1) = (uint32_t) _mm_extract_epi32(vout, 1);
diff --git a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-xop-ld64.c b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-xop-ld64.c
index 63d70b7..251dded 100644
--- a/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-igemm/gen/2x4c2-minmax-gemmlowp-xop-ld64.c
@@ -152,8 +152,8 @@
       p -= 2 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -172,27 +172,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c1) = (uint32_t) _mm_extract_epi32(vout, 1);
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-avx-ld128.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-avx-ld128.c
index dfebf12..ceed36c 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-avx-ld128.c
@@ -112,8 +112,8 @@
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -132,27 +132,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c1) = (uint32_t) _mm_extract_epi32(vout, 1);
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-avx-ld64.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-avx-ld64.c
index e5a1494..ca30e42 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-avx-ld64.c
@@ -114,8 +114,8 @@
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -134,27 +134,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c1) = (uint32_t) _mm_extract_epi32(vout, 1);
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse2-ld128.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse2-ld128.c
index f0dcbad..bc54486 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse2-ld128.c
@@ -112,8 +112,8 @@
     __m128i vacc0x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x02, vacc0x13), _mm_unpackhi_epi32(vacc0x02, vacc0x13));
     __m128i vacc1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc1x02, vacc1x13));
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -156,24 +156,24 @@
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse2-ld64.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse2-ld64.c
index 1782540..c16c117 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse2-ld64.c
@@ -114,8 +114,8 @@
     __m128i vacc0x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc0x02, vacc0x13), _mm_unpackhi_epi32(vacc0x02, vacc0x13));
     __m128i vacc1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc1x02, vacc1x13));
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -158,24 +158,24 @@
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse41-ld128.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse41-ld128.c
index 2832c06..cd2d9f2 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse41-ld128.c
@@ -112,8 +112,8 @@
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -132,27 +132,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c1) = (uint32_t) _mm_extract_epi32(vout, 1);
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse41-ld64.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse41-ld64.c
index e48e958..6eb3515 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-sse41-ld64.c
@@ -114,8 +114,8 @@
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -134,27 +134,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c1) = (uint32_t) _mm_extract_epi32(vout, 1);
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld128.c
index 95b32f2..727aac5 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld128.c
@@ -112,8 +112,8 @@
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -156,24 +156,24 @@
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld64.c
index fd6c95d..5bfabbd 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld64.c
@@ -114,8 +114,8 @@
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -158,24 +158,24 @@
     const __m128i vq31prod0x0123 = _mm_shuffle_epi32(vq31prod0x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld128.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld128.c
index 237b105..bc35e32 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld128.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld128.c
@@ -135,8 +135,8 @@
     const v128_t vacc0x01 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 0, 4, 1, 5);
     const v128_t vacc1x01 = wasm_v32x4_shuffle(vacc1x0123, vsign1x0123, 0, 4, 1, 5);
 
-    const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-    const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+    const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+    const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
     const v128_t vprod0x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc0x01, vmultiplier), vrounding);
     const v128_t vacc0x23 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 2, 6, 3, 7);
     const v128_t vprod1x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc1x01, vmultiplier), vrounding);
@@ -148,24 +148,24 @@
     const v128_t vq31prod0x0123 = wasm_v32x4_shuffle(vprod0x01, vprod0x23, 1, 3, 5, 7);
     const v128_t vq31prod1x0123 = wasm_v32x4_shuffle(vprod1x01, vprod1x23, 1, 3, 5, 7);
 
-    const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+    const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
     const v128_t vrem0x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0x0123, vremainder_mask), wasm_i32x4_lt(vq31prod0x0123, vzero));
     const v128_t vrem1x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod1x0123, vremainder_mask), wasm_i32x4_lt(vq31prod1x0123, vzero));
 
-    const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-    const int32_t vshift = params->wasmsimd.shift;
+    const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+    const int32_t vshift = params->gemmlowp_wasmsimd.shift;
     vacc0x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0x0123, vshift), wasm_i32x4_gt(vrem0x0123, vthreshold));
     vacc1x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod1x0123, vshift), wasm_i32x4_gt(vrem1x0123, vthreshold));
 
-    const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+    const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
     v128_t vacc01x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0x0123, vacc1x0123), voutput_zero_point);
 
     v128_t vout = wasm_i8x16_narrow_i16x8(vacc01x0123, vacc01x0123);
 
-    const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
+    const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
     vout = wasm_i8x16_max(vout, voutput_min);
 
-    const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+    const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
     vout = wasm_i8x16_min(vout, voutput_max);
 
     if (nc >= 4) {
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld64.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld64.c
index 9a67158..5b62a36 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld64.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld64.c
@@ -131,8 +131,8 @@
     const v128_t vacc0x01 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 0, 4, 1, 5);
     const v128_t vacc1x01 = wasm_v32x4_shuffle(vacc1x0123, vsign1x0123, 0, 4, 1, 5);
 
-    const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-    const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+    const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+    const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
     const v128_t vprod0x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc0x01, vmultiplier), vrounding);
     const v128_t vacc0x23 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 2, 6, 3, 7);
     const v128_t vprod1x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc1x01, vmultiplier), vrounding);
@@ -144,24 +144,24 @@
     const v128_t vq31prod0x0123 = wasm_v32x4_shuffle(vprod0x01, vprod0x23, 1, 3, 5, 7);
     const v128_t vq31prod1x0123 = wasm_v32x4_shuffle(vprod1x01, vprod1x23, 1, 3, 5, 7);
 
-    const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+    const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
     const v128_t vrem0x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0x0123, vremainder_mask), wasm_i32x4_lt(vq31prod0x0123, vzero));
     const v128_t vrem1x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod1x0123, vremainder_mask), wasm_i32x4_lt(vq31prod1x0123, vzero));
 
-    const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-    const int32_t vshift = params->wasmsimd.shift;
+    const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+    const int32_t vshift = params->gemmlowp_wasmsimd.shift;
     vacc0x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0x0123, vshift), wasm_i32x4_gt(vrem0x0123, vthreshold));
     vacc1x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod1x0123, vshift), wasm_i32x4_gt(vrem1x0123, vthreshold));
 
-    const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+    const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
     v128_t vacc01x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0x0123, vacc1x0123), voutput_zero_point);
 
     v128_t vout = wasm_i8x16_narrow_i16x8(vacc01x0123, vacc01x0123);
 
-    const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
+    const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
     vout = wasm_i8x16_max(vout, voutput_min);
 
-    const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+    const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
     vout = wasm_i8x16_min(vout, voutput_max);
 
     if (nc >= 4) {
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-xop-ld128.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-xop-ld128.c
index 05804fa..d3a2605 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-xop-ld128.c
@@ -117,8 +117,8 @@
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -137,27 +137,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c1) = (uint32_t) _mm_extract_epi32(vout, 1);
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-xop-ld64.c b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-xop-ld64.c
index 58a7df2..717b1ae 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-gemmlowp-xop-ld64.c
@@ -119,8 +119,8 @@
     __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23);
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -139,27 +139,27 @@
     const __m128i vq31prod0x0123 = _mm_blend_epi16(vq31prod0x02, vq31prod0x13, 0xCC);
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod1x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod1x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod1x0123, vshift), _mm_cmpgt_epi32(vrem1x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc01x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c1) = (uint32_t) _mm_extract_epi32(vout, 1);
diff --git a/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index be6906b..5ff93c2 100644
--- a/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -210,13 +210,13 @@
       p -= 2 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
     vacc1x4567 = vqrdmulhq_s32(vacc1x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -228,7 +228,7 @@
     vacc1x0123 = vrshlq_s32(vacc1x0123, vright_shift);
     vacc1x4567 = vrshlq_s32(vacc1x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -240,8 +240,8 @@
 
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
 
diff --git a/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c
index 49ed995..6dd31d6 100644
--- a/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -209,13 +209,13 @@
       p -= 2 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
     vacc1x4567 = vqrdmulhq_s32(vacc1x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -227,7 +227,7 @@
     vacc1x0123 = vrshlq_s32(vacc1x0123, vright_shift);
     vacc1x4567 = vrshlq_s32(vacc1x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -239,8 +239,8 @@
 
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
 
diff --git a/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mull-addw-dup.c
index 9f6507a..e9e320b 100644
--- a/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-igemm/gen/2x8-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -218,13 +218,13 @@
       p -= 2 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
     vacc1x4567 = vqrdmulhq_s32(vacc1x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -236,7 +236,7 @@
     vacc1x0123 = vrshlq_s32(vacc1x0123, vright_shift);
     vacc1x4567 = vrshlq_s32(vacc1x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -248,8 +248,8 @@
 
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
 
diff --git a/src/qs8-igemm/gen/2x8c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/2x8c16-minmax-gemmlowp-neon-mlal-padal.c
index a41e6c8..2a692b4 100644
--- a/src/qs8-igemm/gen/2x8c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/2x8c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -191,13 +191,13 @@
     int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
     vacc1x4567 = vqrdmulhq_s32(vacc1x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -209,7 +209,7 @@
     vacc1x0123 = vrshlq_s32(vacc1x0123, vright_shift);
     vacc1x4567 = vrshlq_s32(vacc1x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -220,8 +220,8 @@
 
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
 
diff --git a/src/qs8-igemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-igemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index ef3a726..8205735 100644
--- a/src/qs8-igemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-igemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -238,13 +238,13 @@
       p -= 2 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
     vacc1x4567 = vqrdmulhq_s32(vacc1x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -256,7 +256,7 @@
     vacc1x0123 = vrshlq_s32(vacc1x0123, vright_shift);
     vacc1x4567 = vrshlq_s32(vacc1x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -268,8 +268,8 @@
 
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
 
diff --git a/src/qs8-igemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-igemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
index 8a38186..e499fe0 100644
--- a/src/qs8-igemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-igemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -164,13 +164,13 @@
       p -= 2 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
     vacc1x4567 = vqrdmulhq_s32(vacc1x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -182,7 +182,7 @@
     vacc1x0123 = vrshlq_s32(vacc1x0123, vright_shift);
     vacc1x4567 = vrshlq_s32(vacc1x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -194,8 +194,8 @@
 
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
 
diff --git a/src/qs8-igemm/gen/2x8c8-minmax-fp32-avx2.c b/src/qs8-igemm/gen/2x8c8-minmax-fp32-avx2.c
new file mode 100644
index 0000000..ab6220d
--- /dev/null
+++ b/src/qs8-igemm/gen/2x8c8-minmax-fp32-avx2.c
@@ -0,0 +1,192 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-igemm/MRx8c8-avx2.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <immintrin.h>
+
+#include <xnnpack/igemm.h>
+#include <xnnpack/intrinsics-polyfill.h>
+#include <xnnpack/math.h>
+
+
+void xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2(
+    size_t mr,
+    size_t nc,
+    size_t kc,
+    size_t ks,
+    const int8_t** restrict a,
+    const void* restrict w,
+    int8_t* restrict c,
+    size_t cm_stride,
+    size_t cn_stride,
+    size_t a_offset,
+    const int8_t* zero,
+    const union xnn_qs8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN XNN_DISABLE_MSAN
+{
+  assert(mr != 0);
+  assert(mr <= 2);
+  assert(nc != 0);
+  assert(kc != 0);
+  assert(ks != 0);
+  assert(ks % (2 * sizeof(void*)) == 0);
+  assert(a_offset % sizeof(int8_t) == 0);
+  assert(a != NULL);
+  assert(w != NULL);
+  assert(c != NULL);
+
+  kc = round_up_po2(kc, 8);
+  int8_t* c0 = c;
+  int8_t* c1 = (int8_t*) ((uintptr_t) c0 + cm_stride);
+  if XNN_UNPREDICTABLE(mr != 2) {
+    c1 = c0;
+  }
+
+  do {
+    const __m128i vbias0x0 = _mm_loadu_si32(w);
+    const __m128i vbias0x1 = _mm_loadu_si32((const void*) ((uintptr_t) w + sizeof(int32_t)));
+    __m256i vacc0x01 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x0), vbias0x1, 1);
+    const __m128i vbias0x2 = _mm_loadu_si32((const void*) ((uintptr_t) w + 2 * sizeof(int32_t)));
+    const __m128i vbias0x3 = _mm_loadu_si32((const void*) ((uintptr_t) w + 3 * sizeof(int32_t)));
+    __m256i vacc0x23 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x2), vbias0x3, 1);
+    const __m128i vbias0x4 = _mm_loadu_si32((const void*) ((uintptr_t) w + 4 * sizeof(int32_t)));
+    const __m128i vbias0x5 = _mm_loadu_si32((const void*) ((uintptr_t) w + 5 * sizeof(int32_t)));
+    __m256i vacc0x45 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x4), vbias0x5, 1);
+    const __m128i vbias0x6 = _mm_loadu_si32((const void*) ((uintptr_t) w + 6 * sizeof(int32_t)));
+    const __m128i vbias0x7 = _mm_loadu_si32((const void*) ((uintptr_t) w + 7 * sizeof(int32_t)));
+    __m256i vacc0x67 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x6), vbias0x7, 1);
+    __m256i vacc1x01 = vacc0x01;
+    __m256i vacc1x23 = vacc0x23;
+    __m256i vacc1x45 = vacc0x45;
+    __m256i vacc1x67 = vacc0x67;
+    w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
+
+    size_t p = ks;
+    do {
+      const int8_t* restrict a0 = a[0];
+      if XNN_UNPREDICTABLE(a0 != zero) {
+        a0 = (const int8_t*) ((uintptr_t) a0 + a_offset);
+      }
+      const int8_t* restrict a1 = a[1];
+      if XNN_UNPREDICTABLE(a1 != zero) {
+        a1 = (const int8_t*) ((uintptr_t) a1 + a_offset);
+      }
+      a += 2;
+
+      size_t k = 0;
+      while (k < kc) {
+        const __m128i va0 = _mm_broadcastq_epi64(_mm_loadl_epi64((const __m128i*) a0));
+        const __m256i vxa0 = _mm256_cvtepi8_epi16(va0);
+        a0 += 8;
+        const __m128i va1 = _mm_broadcastq_epi64(_mm_loadl_epi64((const __m128i*) a1));
+        const __m256i vxa1 = _mm256_cvtepi8_epi16(va1);
+        a1 += 8;
+
+        const __m128i vb01 = _mm_load_si128((const __m128i*) w);
+        const __m256i vxb01 = _mm256_cvtepi8_epi16(vb01);
+
+        vacc0x01 = _mm256_add_epi32(vacc0x01, _mm256_madd_epi16(vxa0, vxb01));
+        vacc1x01 = _mm256_add_epi32(vacc1x01, _mm256_madd_epi16(vxa1, vxb01));
+        const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int8_t)));
+        const __m256i vxb23 = _mm256_cvtepi8_epi16(vb23);
+
+        vacc0x23 = _mm256_add_epi32(vacc0x23, _mm256_madd_epi16(vxa0, vxb23));
+        vacc1x23 = _mm256_add_epi32(vacc1x23, _mm256_madd_epi16(vxa1, vxb23));
+        const __m128i vb45 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int8_t)));
+        const __m256i vxb45 = _mm256_cvtepi8_epi16(vb45);
+
+        vacc0x45 = _mm256_add_epi32(vacc0x45, _mm256_madd_epi16(vxa0, vxb45));
+        vacc1x45 = _mm256_add_epi32(vacc1x45, _mm256_madd_epi16(vxa1, vxb45));
+        const __m128i vb67 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 48 * sizeof(int8_t)));
+        const __m256i vxb67 = _mm256_cvtepi8_epi16(vb67);
+
+        vacc0x67 = _mm256_add_epi32(vacc0x67, _mm256_madd_epi16(vxa0, vxb67));
+        vacc1x67 = _mm256_add_epi32(vacc1x67, _mm256_madd_epi16(vxa1, vxb67));
+
+        w = (const void*) ((uintptr_t) w + 64 * sizeof(int8_t));
+        k += 8 * sizeof(int8_t);
+      }
+      p -= 2 * sizeof(void*);
+    } while (p != 0);
+
+    const __m256i vacc0x0213 = _mm256_hadd_epi32(vacc0x01, vacc0x23);
+    const __m256i vacc0x4657 = _mm256_hadd_epi32(vacc0x45, vacc0x67);
+    const __m256i vacc1x0213 = _mm256_hadd_epi32(vacc1x01, vacc1x23);
+    const __m256i vacc1x4657 = _mm256_hadd_epi32(vacc1x45, vacc1x67);
+
+    const __m256i vacc0x02461357 = _mm256_hadd_epi32(vacc0x0213, vacc0x4657);
+    const __m256i vacc1x02461357 = _mm256_hadd_epi32(vacc1x0213, vacc1x4657);
+
+    const __m256i vpermute_mask = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
+    __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask);
+    __m256i vacc1x01234567 = _mm256_permutevar8x32_epi32(vacc1x02461357, vpermute_mask);
+
+    __m256 vscaled0x01234567 = _mm256_cvtepi32_ps(vacc0x01234567);
+    __m256 vscaled1x01234567 = _mm256_cvtepi32_ps(vacc1x01234567);
+
+    const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+    vscaled0x01234567 = _mm256_mul_ps(vscaled0x01234567, vscale);
+    vscaled1x01234567 = _mm256_mul_ps(vscaled1x01234567, vscale);
+
+    vacc0x01234567 = _mm256_cvtps_epi32(vscaled0x01234567);
+    vacc1x01234567 = _mm256_cvtps_epi32(vscaled1x01234567);
+
+    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->fp32_avx2.output_zero_point);
+    __m256i vacc01x01234567 = _mm256_adds_epi16(_mm256_packs_epi32(vacc0x01234567, vacc1x01234567), voutput_zero_point);
+
+    vacc01x01234567 = _mm256_permute4x64_epi64(vacc01x01234567, _MM_SHUFFLE(3, 1, 2, 0));
+
+    __m256i vout = _mm256_packs_epi16(vacc01x01234567, vacc01x01234567);
+
+    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->fp32_avx2.output_min));
+    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->fp32_avx2.output_max));
+
+    __m128i vout_lo = _mm256_castsi256_si128(vout);
+    __m128i vout_hi = _mm256_extracti128_si256(vout, 1);
+
+    if (nc >= 8) {
+      _mm_storel_epi64((__m128i*) c1, vout_hi);
+      _mm_storel_epi64((__m128i*) c0, vout_lo);
+
+      c1 = (int8_t*) ((uintptr_t) c1 + cn_stride);
+      c0 = (int8_t*) ((uintptr_t) c0 + cn_stride);
+
+      a = (const int8_t**restrict) ((uintptr_t) a - ks);
+
+      nc -= 8;
+    } else {
+      if (nc & 4) {
+        _mm_storeu_si32(c1, vout_hi);
+        _mm_storeu_si32(c0, vout_lo);
+
+        c1 += 4;
+        c0 += 4;
+
+        vout_lo = _mm_srli_epi64(vout_lo, 32);
+        vout_hi = _mm_srli_epi64(vout_hi, 32);
+      }
+      if (nc & 2) {
+        *((uint16_t*) c1) = (uint16_t) _mm_extract_epi16(vout_hi, 0);
+        *((uint16_t*) c0) = (uint16_t) _mm_extract_epi16(vout_lo, 0);
+
+        c1 += 2;
+        c0 += 2;
+
+        vout_lo = _mm_srli_epi32(vout_lo, 16);
+        vout_hi = _mm_srli_epi32(vout_hi, 16);
+      }
+      if (nc & 1) {
+        *c1 = (uint8_t) _mm_extract_epi8(vout_hi, 0);
+        *c0 = (int8_t) _mm_extract_epi8(vout_lo, 0);
+      }
+
+      nc = 0;
+    }
+  } while (nc != 0);
+}
diff --git a/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-avx2.c b/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-avx2.c
index fa1ac3a..a5c416d 100644
--- a/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-avx2.c
+++ b/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-avx2.c
@@ -127,8 +127,8 @@
     __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask);
     __m256i vacc1x01234567 = _mm256_permutevar8x32_epi32(vacc1x02461357, vpermute_mask);
 
-    const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-    const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+    const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+    const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
     const __m256i vacc0x11335577 = _mm256_shuffle_epi32(vacc0x01234567, _MM_SHUFFLE(3, 3, 1, 1));
     const __m256i vacc1x11335577 = _mm256_shuffle_epi32(vacc1x01234567, _MM_SHUFFLE(3, 3, 1, 1));
@@ -147,28 +147,28 @@
     const __m256i vq31prod0x01234567 = _mm256_blend_epi16(vq31prod0x0246, vq31prod0x1357, 0xCC);
     const __m256i vq31prod1x01234567 = _mm256_blend_epi16(vq31prod1x0246, vq31prod1x1357, 0xCC);
 
-    const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+    const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
     const __m256i vrem0x01234567 =
       _mm256_add_epi32(_mm256_and_si256(vq31prod0x01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod0x01234567));
     const __m256i vrem1x01234567 =
       _mm256_add_epi32(_mm256_and_si256(vq31prod1x01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod1x01234567));
 
-    const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-    const __m128i vshift = _mm_load_si128((const __m128i*) params->avx2.shift);
+    const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+    const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_avx2.shift);
     vacc0x01234567 =
       _mm256_sub_epi32(_mm256_sra_epi32(vq31prod0x01234567, vshift), _mm256_cmpgt_epi32(vrem0x01234567, vremainder_threshold));
     vacc1x01234567 =
       _mm256_sub_epi32(_mm256_sra_epi32(vq31prod1x01234567, vshift), _mm256_cmpgt_epi32(vrem1x01234567, vremainder_threshold));
 
-    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx2.output_zero_point);
+    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_zero_point);
     __m256i vacc01x01234567 = _mm256_adds_epi16(_mm256_packs_epi32(vacc0x01234567, vacc1x01234567), voutput_zero_point);
 
     vacc01x01234567 = _mm256_permute4x64_epi64(vacc01x01234567, _MM_SHUFFLE(3, 1, 2, 0));
 
     __m256i vout = _mm256_packs_epi16(vacc01x01234567, vacc01x01234567);
 
-    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->avx2.output_min));
-    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->avx2.output_max));
+    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_min));
+    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_max));
 
     __m128i vout_lo = _mm256_castsi256_si128(vout);
     __m128i vout_hi = _mm256_extracti128_si256(vout, 1);
diff --git a/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-neon-mlal-padal.c
index 958284a..ec1d49a 100644
--- a/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -250,13 +250,13 @@
     int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
     vacc1x4567 = vqrdmulhq_s32(vacc1x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -268,7 +268,7 @@
     vacc1x0123 = vrshlq_s32(vacc1x0123, vright_shift);
     vacc1x4567 = vrshlq_s32(vacc1x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -279,8 +279,8 @@
 
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
 
diff --git a/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-neon-mull-padal.c
index 4a15b90..bad3768 100644
--- a/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-igemm/gen/2x8c8-minmax-gemmlowp-neon-mull-padal.c
@@ -175,13 +175,13 @@
     int32x4_t vacc1x4567 = vcombine_s32(vsum1x45, vsum1x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
     vacc1x4567 = vqrdmulhq_s32(vacc1x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -193,7 +193,7 @@
     vacc1x0123 = vrshlq_s32(vacc1x0123, vright_shift);
     vacc1x4567 = vrshlq_s32(vacc1x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -204,8 +204,8 @@
 
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
 
diff --git a/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 3d06ac3..0a149f9 100644
--- a/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -389,7 +389,7 @@
       p -= 3 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -403,7 +403,7 @@
     vacc2x89AB = vqrdmulhq_s32(vacc2x89AB, vmultiplier);
     vacc2xCDEF = vqrdmulhq_s32(vacc2xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -431,7 +431,7 @@
     vacc2x89AB = vrshlq_s32(vacc2x89AB, vright_shift);
     vacc2xCDEF = vrshlq_s32(vacc2xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -455,8 +455,8 @@
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x0123456789ABCDEF = vmaxq_s8(vout2x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c
index d2eb117..c5fef29 100644
--- a/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -387,7 +387,7 @@
       p -= 3 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -401,7 +401,7 @@
     vacc2x89AB = vqrdmulhq_s32(vacc2x89AB, vmultiplier);
     vacc2xCDEF = vqrdmulhq_s32(vacc2xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -429,7 +429,7 @@
     vacc2x89AB = vrshlq_s32(vacc2x89AB, vright_shift);
     vacc2xCDEF = vrshlq_s32(vacc2xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -453,8 +453,8 @@
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x0123456789ABCDEF = vmaxq_s8(vout2x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mull-addw-dup.c
index e42fdbd..1fa0999 100644
--- a/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-igemm/gen/3x16-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -439,7 +439,7 @@
       p -= 3 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -453,7 +453,7 @@
     vacc2x89AB = vqrdmulhq_s32(vacc2x89AB, vmultiplier);
     vacc2xCDEF = vqrdmulhq_s32(vacc2xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -481,7 +481,7 @@
     vacc2x89AB = vrshlq_s32(vacc2x89AB, vright_shift);
     vacc2xCDEF = vrshlq_s32(vacc2xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -505,8 +505,8 @@
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x0123456789ABCDEF = vmaxq_s8(vout2x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/3x16c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/3x16c16-minmax-gemmlowp-neon-mlal-padal.c
index baf2a7c..d427f34 100644
--- a/src/qs8-igemm/gen/3x16c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/3x16c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -416,7 +416,7 @@
     int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -430,7 +430,7 @@
     vacc2x89AB = vqrdmulhq_s32(vacc2x89AB, vmultiplier);
     vacc2xCDEF = vqrdmulhq_s32(vacc2xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -458,7 +458,7 @@
     vacc2x89AB = vrshlq_s32(vacc2x89AB, vright_shift);
     vacc2xCDEF = vrshlq_s32(vacc2xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -481,8 +481,8 @@
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x0123456789ABCDEF = vmaxq_s8(vout2x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/3x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-igemm/gen/3x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 4171f7c..d105e81 100644
--- a/src/qs8-igemm/gen/3x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-igemm/gen/3x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -496,7 +496,7 @@
       p -= 3 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -510,7 +510,7 @@
     vacc2x89AB = vqrdmulhq_s32(vacc2x89AB, vmultiplier);
     vacc2xCDEF = vqrdmulhq_s32(vacc2xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -538,7 +538,7 @@
     vacc2x89AB = vrshlq_s32(vacc2x89AB, vright_shift);
     vacc2xCDEF = vrshlq_s32(vacc2xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -562,8 +562,8 @@
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x0123456789ABCDEF = vmaxq_s8(vout2x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/3x16c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-igemm/gen/3x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
index 9a5f7c1..f1fb618 100644
--- a/src/qs8-igemm/gen/3x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-igemm/gen/3x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -308,7 +308,7 @@
       p -= 3 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -322,7 +322,7 @@
     vacc2x89AB = vqrdmulhq_s32(vacc2x89AB, vmultiplier);
     vacc2xCDEF = vqrdmulhq_s32(vacc2xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -350,7 +350,7 @@
     vacc2x89AB = vrshlq_s32(vacc2x89AB, vright_shift);
     vacc2xCDEF = vrshlq_s32(vacc2xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -374,8 +374,8 @@
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x0123456789ABCDEF = vmaxq_s8(vout2x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-avx512skx.c b/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-avx512skx.c
index c9b90bb..db80256 100644
--- a/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-avx512skx.c
+++ b/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-avx512skx.c
@@ -52,14 +52,14 @@
 
   const __mmask16 vbias_mask = _cvtu32_mask16(0x1111);
   const __mmask16 vblend_mask = _cvtu32_mask16(0xAAAA);
-  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.multiplier));
-  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.rounding));
-  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.remainder_mask));
-  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.remainder_threshold));
-  const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
-  const __m512i voutput_zero_point = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.output_zero_point));
-  const __m512i voutput_min = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.output_min));
-  const __m512i voutput_max = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.output_max));
+  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier));
+  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding));
+  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask));
+  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold));
+  const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
+  const __m512i voutput_zero_point = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point));
+  const __m512i voutput_min = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+  const __m512i voutput_max = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
   do {
     __m512i vacc0x0123 = _mm512_maskz_expandloadu_epi32(vbias_mask, w);
     __m512i vacc0x4567 = _mm512_maskz_expandloadu_epi32(vbias_mask, (const void*) ((uintptr_t) w + 4 * sizeof(int32_t)));
diff --git a/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-neon-mlal-padal.c
index b44bd79..0f99704 100644
--- a/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -557,7 +557,7 @@
     int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -571,7 +571,7 @@
     vacc2x89AB = vqrdmulhq_s32(vacc2x89AB, vmultiplier);
     vacc2xCDEF = vqrdmulhq_s32(vacc2xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -599,7 +599,7 @@
     vacc2x89AB = vrshlq_s32(vacc2x89AB, vright_shift);
     vacc2xCDEF = vrshlq_s32(vacc2xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -622,8 +622,8 @@
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x0123456789ABCDEF = vmaxq_s8(vout2x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-neon-mull-padal.c
index 4d9e5b4..d73f4b3 100644
--- a/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-igemm/gen/3x16c8-minmax-gemmlowp-neon-mull-padal.c
@@ -368,7 +368,7 @@
     int32x4_t vacc2xCDEF = vcombine_s32(vsum2xCD, vsum2xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -382,7 +382,7 @@
     vacc2x89AB = vqrdmulhq_s32(vacc2x89AB, vmultiplier);
     vacc2xCDEF = vqrdmulhq_s32(vacc2xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -410,7 +410,7 @@
     vacc2x89AB = vrshlq_s32(vacc2x89AB, vright_shift);
     vacc2xCDEF = vrshlq_s32(vacc2xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -433,8 +433,8 @@
     int8x16_t vout1x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc1x01234567), vqmovn_s16(vacc1x89ABCDEF));
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x0123456789ABCDEF = vmaxq_s8(vout2x0123456789ABCDEF, voutput_min);
     vout1x0123456789ABCDEF = vmaxq_s8(vout1x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/3x2-minmax-gemmlowp-scalar.c b/src/qs8-igemm/gen/3x2-minmax-gemmlowp-scalar.c
index 8589d8a..79b5895 100644
--- a/src/qs8-igemm/gen/3x2-minmax-gemmlowp-scalar.c
+++ b/src/qs8-igemm/gen/3x2-minmax-gemmlowp-scalar.c
@@ -97,7 +97,7 @@
       p -= 3 * sizeof(void*);
     } while (p != 0);
 
-    const int32_t vmultiplier = params->scalar.multiplier;
+    const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
     const int64_t vproduct0x0 = (int64_t) vacc0x0 * (int64_t) vmultiplier;
     const int64_t vproduct0x1 = (int64_t) vacc0x1 * (int64_t) vmultiplier;
     const int64_t vproduct1x0 = (int64_t) vacc1x0 * (int64_t) vmultiplier;
@@ -113,7 +113,7 @@
     const int32_t vq31product2x0 = (int32_t) (uint32_t) ((uint64_t) (vproduct2x0 + vq31rounding) >> 31);
     const int32_t vq31product2x1 = (int32_t) (uint32_t) ((uint64_t) (vproduct2x1 + vq31rounding) >> 31);
 
-    const int32_t vremainder_mask = params->scalar.remainder_mask;
+    const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
     const int32_t vremainder0x0 = (vq31product0x0 & vremainder_mask) - (int32_t) (vq31product0x0 < 0);
     const int32_t vremainder0x1 = (vq31product0x1 & vremainder_mask) - (int32_t) (vq31product0x1 < 0);
     const int32_t vremainder1x0 = (vq31product1x0 & vremainder_mask) - (int32_t) (vq31product1x0 < 0);
@@ -121,8 +121,8 @@
     const int32_t vremainder2x0 = (vq31product2x0 & vremainder_mask) - (int32_t) (vq31product2x0 < 0);
     const int32_t vremainder2x1 = (vq31product2x1 & vremainder_mask) - (int32_t) (vq31product2x1 < 0);
 
-    const uint32_t vshift = params->scalar.shift;
-    const int32_t vremainder_threshold = params->scalar.remainder_threshold;
+    const uint32_t vshift = params->gemmlowp_scalar.shift;
+    const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
     int32_t vout0x0 = asr_s32(vq31product0x0, vshift) + (int32_t) (vremainder0x0 > vremainder_threshold);
     int32_t vout0x1 = asr_s32(vq31product0x1, vshift) + (int32_t) (vremainder0x1 > vremainder_threshold);
     int32_t vout1x0 = asr_s32(vq31product1x0, vshift) + (int32_t) (vremainder1x0 > vremainder_threshold);
@@ -130,7 +130,7 @@
     int32_t vout2x0 = asr_s32(vq31product2x0, vshift) + (int32_t) (vremainder2x0 > vremainder_threshold);
     int32_t vout2x1 = asr_s32(vq31product2x1, vshift) + (int32_t) (vremainder2x1 > vremainder_threshold);
 
-    const int32_t vout_min = params->scalar.output_min_less_zero_point;
+    const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
     vout0x0 = math_max_s32(vout0x0, vout_min);
     vout0x1 = math_max_s32(vout0x1, vout_min);
     vout1x0 = math_max_s32(vout1x0, vout_min);
@@ -138,7 +138,7 @@
     vout2x0 = math_max_s32(vout2x0, vout_min);
     vout2x1 = math_max_s32(vout2x1, vout_min);
 
-    const int32_t vout_max = params->scalar.output_max_less_zero_point;
+    const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
     vout0x0 = math_min_s32(vout0x0, vout_max);
     vout0x1 = math_min_s32(vout0x1, vout_max);
     vout1x0 = math_min_s32(vout1x0, vout_max);
@@ -146,7 +146,7 @@
     vout2x0 = math_min_s32(vout2x0, vout_max);
     vout2x1 = math_min_s32(vout2x1, vout_max);
 
-    const int32_t voutput_zero_point = params->scalar.output_zero_point;
+    const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
     vout0x0 += voutput_zero_point;
     vout0x1 += voutput_zero_point;
     vout1x0 += voutput_zero_point;
diff --git a/src/qs8-igemm/gen/3x4-minmax-gemmlowp-scalar.c b/src/qs8-igemm/gen/3x4-minmax-gemmlowp-scalar.c
index 1e0723f..b4b1ddc 100644
--- a/src/qs8-igemm/gen/3x4-minmax-gemmlowp-scalar.c
+++ b/src/qs8-igemm/gen/3x4-minmax-gemmlowp-scalar.c
@@ -111,7 +111,7 @@
       p -= 3 * sizeof(void*);
     } while (p != 0);
 
-    const int32_t vmultiplier = params->scalar.multiplier;
+    const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
     const int64_t vproduct0x0 = (int64_t) vacc0x0 * (int64_t) vmultiplier;
     const int64_t vproduct0x1 = (int64_t) vacc0x1 * (int64_t) vmultiplier;
     const int64_t vproduct0x2 = (int64_t) vacc0x2 * (int64_t) vmultiplier;
@@ -139,7 +139,7 @@
     const int32_t vq31product2x2 = (int32_t) (uint32_t) ((uint64_t) (vproduct2x2 + vq31rounding) >> 31);
     const int32_t vq31product2x3 = (int32_t) (uint32_t) ((uint64_t) (vproduct2x3 + vq31rounding) >> 31);
 
-    const int32_t vremainder_mask = params->scalar.remainder_mask;
+    const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
     const int32_t vremainder0x0 = (vq31product0x0 & vremainder_mask) - (int32_t) (vq31product0x0 < 0);
     const int32_t vremainder0x1 = (vq31product0x1 & vremainder_mask) - (int32_t) (vq31product0x1 < 0);
     const int32_t vremainder0x2 = (vq31product0x2 & vremainder_mask) - (int32_t) (vq31product0x2 < 0);
@@ -153,8 +153,8 @@
     const int32_t vremainder2x2 = (vq31product2x2 & vremainder_mask) - (int32_t) (vq31product2x2 < 0);
     const int32_t vremainder2x3 = (vq31product2x3 & vremainder_mask) - (int32_t) (vq31product2x3 < 0);
 
-    const uint32_t vshift = params->scalar.shift;
-    const int32_t vremainder_threshold = params->scalar.remainder_threshold;
+    const uint32_t vshift = params->gemmlowp_scalar.shift;
+    const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
     int32_t vout0x0 = asr_s32(vq31product0x0, vshift) + (int32_t) (vremainder0x0 > vremainder_threshold);
     int32_t vout0x1 = asr_s32(vq31product0x1, vshift) + (int32_t) (vremainder0x1 > vremainder_threshold);
     int32_t vout0x2 = asr_s32(vq31product0x2, vshift) + (int32_t) (vremainder0x2 > vremainder_threshold);
@@ -168,7 +168,7 @@
     int32_t vout2x2 = asr_s32(vq31product2x2, vshift) + (int32_t) (vremainder2x2 > vremainder_threshold);
     int32_t vout2x3 = asr_s32(vq31product2x3, vshift) + (int32_t) (vremainder2x3 > vremainder_threshold);
 
-    const int32_t vout_min = params->scalar.output_min_less_zero_point;
+    const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
     vout0x0 = math_max_s32(vout0x0, vout_min);
     vout0x1 = math_max_s32(vout0x1, vout_min);
     vout0x2 = math_max_s32(vout0x2, vout_min);
@@ -182,7 +182,7 @@
     vout2x2 = math_max_s32(vout2x2, vout_min);
     vout2x3 = math_max_s32(vout2x3, vout_min);
 
-    const int32_t vout_max = params->scalar.output_max_less_zero_point;
+    const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
     vout0x0 = math_min_s32(vout0x0, vout_max);
     vout0x1 = math_min_s32(vout0x1, vout_max);
     vout0x2 = math_min_s32(vout0x2, vout_max);
@@ -196,7 +196,7 @@
     vout2x2 = math_min_s32(vout2x2, vout_max);
     vout2x3 = math_min_s32(vout2x3, vout_max);
 
-    const int32_t voutput_zero_point = params->scalar.output_zero_point;
+    const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
     vout0x0 += voutput_zero_point;
     vout0x1 += voutput_zero_point;
     vout0x2 += voutput_zero_point;
diff --git a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-avx-ld128.c b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-avx-ld128.c
index 7c0ac28..0c941bd 100644
--- a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-avx-ld128.c
@@ -176,8 +176,8 @@
       p -= 3 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -202,7 +202,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -210,8 +210,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -219,15 +219,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c2) = (uint32_t) _mm_extract_epi32(vout, 2);
diff --git a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-avx-ld64.c b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-avx-ld64.c
index ee31cdf..e98b019 100644
--- a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-avx-ld64.c
@@ -176,8 +176,8 @@
       p -= 3 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -202,7 +202,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -210,8 +210,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -219,15 +219,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c2) = (uint32_t) _mm_extract_epi32(vout, 2);
diff --git a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse2-ld128.c b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse2-ld128.c
index ce17450..51b1271 100644
--- a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse2-ld128.c
@@ -176,8 +176,8 @@
       p -= 3 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -234,7 +234,7 @@
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -242,8 +242,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -251,12 +251,12 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse2-ld64.c b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse2-ld64.c
index 0bb45da..25b9a16 100644
--- a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse2-ld64.c
@@ -176,8 +176,8 @@
       p -= 3 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -234,7 +234,7 @@
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -242,8 +242,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -251,12 +251,12 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse41-ld128.c b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse41-ld128.c
index 9d3d199..02850f4 100644
--- a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse41-ld128.c
@@ -176,8 +176,8 @@
       p -= 3 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -202,7 +202,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -210,8 +210,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -219,15 +219,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c2) = (uint32_t) _mm_extract_epi32(vout, 2);
diff --git a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse41-ld64.c b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse41-ld64.c
index 71ad095..b886943 100644
--- a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-sse41-ld64.c
@@ -176,8 +176,8 @@
       p -= 3 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -202,7 +202,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -210,8 +210,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -219,15 +219,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c2) = (uint32_t) _mm_extract_epi32(vout, 2);
diff --git a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld128.c
index 8422a5d..c9743bc 100644
--- a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld128.c
@@ -176,8 +176,8 @@
       p -= 3 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -234,7 +234,7 @@
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -242,8 +242,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -251,12 +251,12 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld64.c
index 5e2ed5a..5b6f8e6 100644
--- a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld64.c
@@ -176,8 +176,8 @@
       p -= 3 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -234,7 +234,7 @@
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -242,8 +242,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -251,12 +251,12 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-xop-ld128.c b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-xop-ld128.c
index fd55767..7df63c6 100644
--- a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-xop-ld128.c
@@ -181,8 +181,8 @@
       p -= 3 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -207,7 +207,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -215,8 +215,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -224,15 +224,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c2) = (uint32_t) _mm_extract_epi32(vout, 2);
diff --git a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-xop-ld64.c b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-xop-ld64.c
index 39e2d25..197a6af 100644
--- a/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-igemm/gen/3x4c2-minmax-gemmlowp-xop-ld64.c
@@ -181,8 +181,8 @@
       p -= 3 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -207,7 +207,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -215,8 +215,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -224,15 +224,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c2) = (uint32_t) _mm_extract_epi32(vout, 2);
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-avx-ld128.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-avx-ld128.c
index 223cc60..b49c379 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-avx-ld128.c
@@ -134,8 +134,8 @@
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
     __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -160,7 +160,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -168,8 +168,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -177,15 +177,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c2) = (uint32_t) _mm_extract_epi32(vout, 2);
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-avx-ld64.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-avx-ld64.c
index a72c325..a7d9926 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-avx-ld64.c
@@ -136,8 +136,8 @@
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
     __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -162,7 +162,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -170,8 +170,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -179,15 +179,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c2) = (uint32_t) _mm_extract_epi32(vout, 2);
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse2-ld128.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse2-ld128.c
index 837a82a..a59b40a 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse2-ld128.c
@@ -134,8 +134,8 @@
     __m128i vacc1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc1x02, vacc1x13));
     __m128i vacc2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc2x02, vacc2x13));
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -192,7 +192,7 @@
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -200,8 +200,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -209,12 +209,12 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse2-ld64.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse2-ld64.c
index 6d67db7..ca33a86 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse2-ld64.c
@@ -136,8 +136,8 @@
     __m128i vacc1x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc1x02, vacc1x13), _mm_unpackhi_epi32(vacc1x02, vacc1x13));
     __m128i vacc2x0123 = _mm_add_epi32(_mm_unpacklo_epi32(vacc2x02, vacc2x13), _mm_unpackhi_epi32(vacc2x02, vacc2x13));
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -194,7 +194,7 @@
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -202,8 +202,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -211,12 +211,12 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse41-ld128.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse41-ld128.c
index c0ca2dc..ca1f14f 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse41-ld128.c
@@ -134,8 +134,8 @@
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
     __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -160,7 +160,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -168,8 +168,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -177,15 +177,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c2) = (uint32_t) _mm_extract_epi32(vout, 2);
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse41-ld64.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse41-ld64.c
index 2a8923f..d658603 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-sse41-ld64.c
@@ -136,8 +136,8 @@
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
     __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -162,7 +162,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -170,8 +170,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -179,15 +179,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c2) = (uint32_t) _mm_extract_epi32(vout, 2);
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld128.c
index 722aaf7..1ec5500 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld128.c
@@ -134,8 +134,8 @@
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
     __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -192,7 +192,7 @@
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -200,8 +200,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -209,12 +209,12 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld64.c
index 9cdc776..ebf492b 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld64.c
@@ -136,8 +136,8 @@
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
     __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -194,7 +194,7 @@
     const __m128i vq31prod1x0123 = _mm_shuffle_epi32(vq31prod1x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -202,8 +202,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -211,12 +211,12 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc22x0123 = _mm_min_epi16(_mm_max_epi16(vacc22x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld128.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld128.c
index 21a741d..1b89f99 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld128.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld128.c
@@ -166,8 +166,8 @@
     const v128_t vacc1x01 = wasm_v32x4_shuffle(vacc1x0123, vsign1x0123, 0, 4, 1, 5);
     const v128_t vacc2x01 = wasm_v32x4_shuffle(vacc2x0123, vsign2x0123, 0, 4, 1, 5);
 
-    const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-    const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+    const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+    const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
     const v128_t vprod0x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc0x01, vmultiplier), vrounding);
     const v128_t vacc0x23 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 2, 6, 3, 7);
     const v128_t vprod1x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc1x01, vmultiplier), vrounding);
@@ -183,27 +183,27 @@
     const v128_t vq31prod1x0123 = wasm_v32x4_shuffle(vprod1x01, vprod1x23, 1, 3, 5, 7);
     const v128_t vq31prod2x0123 = wasm_v32x4_shuffle(vprod2x01, vprod2x23, 1, 3, 5, 7);
 
-    const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+    const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
     const v128_t vrem0x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0x0123, vremainder_mask), wasm_i32x4_lt(vq31prod0x0123, vzero));
     const v128_t vrem1x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod1x0123, vremainder_mask), wasm_i32x4_lt(vq31prod1x0123, vzero));
     const v128_t vrem2x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod2x0123, vremainder_mask), wasm_i32x4_lt(vq31prod2x0123, vzero));
 
-    const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-    const int32_t vshift = params->wasmsimd.shift;
+    const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+    const int32_t vshift = params->gemmlowp_wasmsimd.shift;
     vacc0x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0x0123, vshift), wasm_i32x4_gt(vrem0x0123, vthreshold));
     vacc1x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod1x0123, vshift), wasm_i32x4_gt(vrem1x0123, vthreshold));
     vacc2x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod2x0123, vshift), wasm_i32x4_gt(vrem2x0123, vthreshold));
 
-    const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+    const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
     v128_t vacc01x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0x0123, vacc1x0123), voutput_zero_point);
     v128_t vacc22x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc2x0123, vacc2x0123), voutput_zero_point);
 
     v128_t vout = wasm_i8x16_narrow_i16x8(vacc01x0123, vacc22x0123);
 
-    const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
+    const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
     vout = wasm_i8x16_max(vout, voutput_min);
 
-    const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+    const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
     vout = wasm_i8x16_min(vout, voutput_max);
 
     if (nc >= 4) {
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld64.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld64.c
index b5bddcb..cb3de49 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld64.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld64.c
@@ -162,8 +162,8 @@
     const v128_t vacc1x01 = wasm_v32x4_shuffle(vacc1x0123, vsign1x0123, 0, 4, 1, 5);
     const v128_t vacc2x01 = wasm_v32x4_shuffle(vacc2x0123, vsign2x0123, 0, 4, 1, 5);
 
-    const v128_t vmultiplier = wasm_v128_load(params->wasmsimd.multiplier);
-    const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding);
+    const v128_t vmultiplier = wasm_v128_load(params->gemmlowp_wasmsimd.multiplier);
+    const v128_t vrounding = wasm_v128_load(params->gemmlowp_wasmsimd.rounding);
     const v128_t vprod0x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc0x01, vmultiplier), vrounding);
     const v128_t vacc0x23 = wasm_v32x4_shuffle(vacc0x0123, vsign0x0123, 2, 6, 3, 7);
     const v128_t vprod1x01 = wasm_i64x2_add(wasm_i64x2_mul(vacc1x01, vmultiplier), vrounding);
@@ -179,27 +179,27 @@
     const v128_t vq31prod1x0123 = wasm_v32x4_shuffle(vprod1x01, vprod1x23, 1, 3, 5, 7);
     const v128_t vq31prod2x0123 = wasm_v32x4_shuffle(vprod2x01, vprod2x23, 1, 3, 5, 7);
 
-    const v128_t vremainder_mask = wasm_v128_load(params->wasmsimd.remainder_mask);
+    const v128_t vremainder_mask = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_mask);
     const v128_t vrem0x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod0x0123, vremainder_mask), wasm_i32x4_lt(vq31prod0x0123, vzero));
     const v128_t vrem1x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod1x0123, vremainder_mask), wasm_i32x4_lt(vq31prod1x0123, vzero));
     const v128_t vrem2x0123 = wasm_i32x4_add(wasm_v128_and(vq31prod2x0123, vremainder_mask), wasm_i32x4_lt(vq31prod2x0123, vzero));
 
-    const v128_t vthreshold = wasm_v128_load(params->wasmsimd.remainder_threshold);
-    const int32_t vshift = params->wasmsimd.shift;
+    const v128_t vthreshold = wasm_v128_load(params->gemmlowp_wasmsimd.remainder_threshold);
+    const int32_t vshift = params->gemmlowp_wasmsimd.shift;
     vacc0x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod0x0123, vshift), wasm_i32x4_gt(vrem0x0123, vthreshold));
     vacc1x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod1x0123, vshift), wasm_i32x4_gt(vrem1x0123, vthreshold));
     vacc2x0123 = wasm_i32x4_sub(wasm_i32x4_shr(vq31prod2x0123, vshift), wasm_i32x4_gt(vrem2x0123, vthreshold));
 
-    const v128_t voutput_zero_point = wasm_v128_load(params->wasmsimd.output_zero_point);
+    const v128_t voutput_zero_point = wasm_v128_load(params->gemmlowp_wasmsimd.output_zero_point);
     v128_t vacc01x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc0x0123, vacc1x0123), voutput_zero_point);
     v128_t vacc22x0123 = wasm_i16x8_add_saturate(wasm_i16x8_narrow_i32x4(vacc2x0123, vacc2x0123), voutput_zero_point);
 
     v128_t vout = wasm_i8x16_narrow_i16x8(vacc01x0123, vacc22x0123);
 
-    const v128_t voutput_min = wasm_v128_load(params->wasmsimd.output_min);
+    const v128_t voutput_min = wasm_v128_load(params->gemmlowp_wasmsimd.output_min);
     vout = wasm_i8x16_max(vout, voutput_min);
 
-    const v128_t voutput_max = wasm_v128_load(params->wasmsimd.output_max);
+    const v128_t voutput_max = wasm_v128_load(params->gemmlowp_wasmsimd.output_max);
     vout = wasm_i8x16_min(vout, voutput_max);
 
     if (nc >= 4) {
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-xop-ld128.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-xop-ld128.c
index eb89693..510b01b 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-xop-ld128.c
@@ -139,8 +139,8 @@
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
     __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -165,7 +165,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -173,8 +173,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -182,15 +182,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c2) = (uint32_t) _mm_extract_epi32(vout, 2);
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-xop-ld64.c b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-xop-ld64.c
index a42b121..3c38797 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-gemmlowp-xop-ld64.c
@@ -141,8 +141,8 @@
     __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23);
     __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -167,7 +167,7 @@
     const __m128i vq31prod1x0123 = _mm_blend_epi16(vq31prod1x02, vq31prod1x13, 0xCC);
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -175,8 +175,8 @@
     const __m128i vrem2x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod2x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod2x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -184,15 +184,15 @@
     vacc2x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod2x0123, vshift), _mm_cmpgt_epi32(vrem2x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc22x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc2x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc22x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c2) = (uint32_t) _mm_extract_epi32(vout, 2);
diff --git a/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 907b8a7..4464592 100644
--- a/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -254,7 +254,7 @@
       p -= 3 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -262,7 +262,7 @@
     vacc2x0123 = vqrdmulhq_s32(vacc2x0123, vmultiplier);
     vacc2x4567 = vqrdmulhq_s32(vacc2x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -278,7 +278,7 @@
     vacc2x0123 = vrshlq_s32(vacc2x0123, vright_shift);
     vacc2x4567 = vrshlq_s32(vacc2x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -294,8 +294,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x8_t vout2x01234567 = vqmovn_s16(vacc2x01234567);
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x01234567 = vmax_s8(vout2x01234567, vget_low_s8(voutput_min));
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
diff --git a/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c
index 8e61ea4..0348cbe 100644
--- a/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -253,7 +253,7 @@
       p -= 3 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -261,7 +261,7 @@
     vacc2x0123 = vqrdmulhq_s32(vacc2x0123, vmultiplier);
     vacc2x4567 = vqrdmulhq_s32(vacc2x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -277,7 +277,7 @@
     vacc2x0123 = vrshlq_s32(vacc2x0123, vright_shift);
     vacc2x4567 = vrshlq_s32(vacc2x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -293,8 +293,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x8_t vout2x01234567 = vqmovn_s16(vacc2x01234567);
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x01234567 = vmax_s8(vout2x01234567, vget_low_s8(voutput_min));
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
diff --git a/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mull-addw-dup.c
index f6a2d9f..99036ad 100644
--- a/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-igemm/gen/3x8-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -275,7 +275,7 @@
       p -= 3 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -283,7 +283,7 @@
     vacc2x0123 = vqrdmulhq_s32(vacc2x0123, vmultiplier);
     vacc2x4567 = vqrdmulhq_s32(vacc2x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -299,7 +299,7 @@
     vacc2x0123 = vrshlq_s32(vacc2x0123, vright_shift);
     vacc2x4567 = vrshlq_s32(vacc2x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -315,8 +315,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x8_t vout2x01234567 = vqmovn_s16(vacc2x01234567);
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x01234567 = vmax_s8(vout2x01234567, vget_low_s8(voutput_min));
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
diff --git a/src/qs8-igemm/gen/3x8c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/3x8c16-minmax-gemmlowp-neon-mlal-padal.c
index 76cc0ca..7158ace 100644
--- a/src/qs8-igemm/gen/3x8c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/3x8c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -252,7 +252,7 @@
     int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -260,7 +260,7 @@
     vacc2x0123 = vqrdmulhq_s32(vacc2x0123, vmultiplier);
     vacc2x4567 = vqrdmulhq_s32(vacc2x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -276,7 +276,7 @@
     vacc2x0123 = vrshlq_s32(vacc2x0123, vright_shift);
     vacc2x4567 = vrshlq_s32(vacc2x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -291,8 +291,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x8_t vout2x01234567 = vqmovn_s16(vacc2x01234567);
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x01234567 = vmax_s8(vout2x01234567, vget_low_s8(voutput_min));
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
diff --git a/src/qs8-igemm/gen/3x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-igemm/gen/3x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 86b9b9e..694efcd 100644
--- a/src/qs8-igemm/gen/3x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-igemm/gen/3x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -304,7 +304,7 @@
       p -= 3 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -312,7 +312,7 @@
     vacc2x0123 = vqrdmulhq_s32(vacc2x0123, vmultiplier);
     vacc2x4567 = vqrdmulhq_s32(vacc2x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -328,7 +328,7 @@
     vacc2x0123 = vrshlq_s32(vacc2x0123, vright_shift);
     vacc2x4567 = vrshlq_s32(vacc2x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -344,8 +344,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x8_t vout2x01234567 = vqmovn_s16(vacc2x01234567);
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x01234567 = vmax_s8(vout2x01234567, vget_low_s8(voutput_min));
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
diff --git a/src/qs8-igemm/gen/3x8c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-igemm/gen/3x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
index 8937afe..916ab15 100644
--- a/src/qs8-igemm/gen/3x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-igemm/gen/3x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -204,7 +204,7 @@
       p -= 3 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -212,7 +212,7 @@
     vacc2x0123 = vqrdmulhq_s32(vacc2x0123, vmultiplier);
     vacc2x4567 = vqrdmulhq_s32(vacc2x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -228,7 +228,7 @@
     vacc2x0123 = vrshlq_s32(vacc2x0123, vright_shift);
     vacc2x4567 = vrshlq_s32(vacc2x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -244,8 +244,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x8_t vout2x01234567 = vqmovn_s16(vacc2x01234567);
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x01234567 = vmax_s8(vout2x01234567, vget_low_s8(voutput_min));
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
diff --git a/src/qs8-igemm/gen/3x8c8-minmax-fp32-avx2.c b/src/qs8-igemm/gen/3x8c8-minmax-fp32-avx2.c
new file mode 100644
index 0000000..ecd6239
--- /dev/null
+++ b/src/qs8-igemm/gen/3x8c8-minmax-fp32-avx2.c
@@ -0,0 +1,227 @@
+// Auto-generated file. Do not edit!
+//   Template: src/qs8-igemm/MRx8c8-avx2.c.in
+//   Generator: tools/xngen
+//
+// Copyright 2020 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+
+#include <immintrin.h>
+
+#include <xnnpack/igemm.h>
+#include <xnnpack/intrinsics-polyfill.h>
+#include <xnnpack/math.h>
+
+
+void xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2(
+    size_t mr,
+    size_t nc,
+    size_t kc,
+    size_t ks,
+    const int8_t** restrict a,
+    const void* restrict w,
+    int8_t* restrict c,
+    size_t cm_stride,
+    size_t cn_stride,
+    size_t a_offset,
+    const int8_t* zero,
+    const union xnn_qs8_conv_minmax_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN XNN_DISABLE_MSAN
+{
+  assert(mr != 0);
+  assert(mr <= 3);
+  assert(nc != 0);
+  assert(kc != 0);
+  assert(ks != 0);
+  assert(ks % (3 * sizeof(void*)) == 0);
+  assert(a_offset % sizeof(int8_t) == 0);
+  assert(a != NULL);
+  assert(w != NULL);
+  assert(c != NULL);
+
+  kc = round_up_po2(kc, 8);
+  int8_t* c0 = c;
+  int8_t* c1 = (int8_t*) ((uintptr_t) c0 + cm_stride);
+  if XNN_UNPREDICTABLE(mr < 2) {
+    c1 = c0;
+  }
+  int8_t* c2 = (int8_t*) ((uintptr_t) c1 + cm_stride);
+  if XNN_UNPREDICTABLE(mr <= 2) {
+    c2 = c1;
+  }
+
+  do {
+    const __m128i vbias0x0 = _mm_loadu_si32(w);
+    const __m128i vbias0x1 = _mm_loadu_si32((const void*) ((uintptr_t) w + sizeof(int32_t)));
+    __m256i vacc0x01 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x0), vbias0x1, 1);
+    const __m128i vbias0x2 = _mm_loadu_si32((const void*) ((uintptr_t) w + 2 * sizeof(int32_t)));
+    const __m128i vbias0x3 = _mm_loadu_si32((const void*) ((uintptr_t) w + 3 * sizeof(int32_t)));
+    __m256i vacc0x23 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x2), vbias0x3, 1);
+    const __m128i vbias0x4 = _mm_loadu_si32((const void*) ((uintptr_t) w + 4 * sizeof(int32_t)));
+    const __m128i vbias0x5 = _mm_loadu_si32((const void*) ((uintptr_t) w + 5 * sizeof(int32_t)));
+    __m256i vacc0x45 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x4), vbias0x5, 1);
+    const __m128i vbias0x6 = _mm_loadu_si32((const void*) ((uintptr_t) w + 6 * sizeof(int32_t)));
+    const __m128i vbias0x7 = _mm_loadu_si32((const void*) ((uintptr_t) w + 7 * sizeof(int32_t)));
+    __m256i vacc0x67 = _mm256_inserti128_si256(_mm256_castsi128_si256(vbias0x6), vbias0x7, 1);
+    __m256i vacc1x01 = vacc0x01;
+    __m256i vacc1x23 = vacc0x23;
+    __m256i vacc1x45 = vacc0x45;
+    __m256i vacc1x67 = vacc0x67;
+    __m256i vacc2x01 = vacc0x01;
+    __m256i vacc2x23 = vacc0x23;
+    __m256i vacc2x45 = vacc0x45;
+    __m256i vacc2x67 = vacc0x67;
+    w = (const void*) ((uintptr_t) w + 8 * sizeof(int32_t));
+
+    size_t p = ks;
+    do {
+      const int8_t* restrict a0 = a[0];
+      if XNN_UNPREDICTABLE(a0 != zero) {
+        a0 = (const int8_t*) ((uintptr_t) a0 + a_offset);
+      }
+      const int8_t* restrict a1 = a[1];
+      if XNN_UNPREDICTABLE(a1 != zero) {
+        a1 = (const int8_t*) ((uintptr_t) a1 + a_offset);
+      }
+      const int8_t* restrict a2 = a[2];
+      if XNN_UNPREDICTABLE(a2 != zero) {
+        a2 = (const int8_t*) ((uintptr_t) a2 + a_offset);
+      }
+      a += 3;
+
+      size_t k = 0;
+      while (k < kc) {
+        const __m128i va0 = _mm_broadcastq_epi64(_mm_loadl_epi64((const __m128i*) a0));
+        const __m256i vxa0 = _mm256_cvtepi8_epi16(va0);
+        a0 += 8;
+        const __m128i va1 = _mm_broadcastq_epi64(_mm_loadl_epi64((const __m128i*) a1));
+        const __m256i vxa1 = _mm256_cvtepi8_epi16(va1);
+        a1 += 8;
+        const __m128i va2 = _mm_broadcastq_epi64(_mm_loadl_epi64((const __m128i*) a2));
+        const __m256i vxa2 = _mm256_cvtepi8_epi16(va2);
+        a2 += 8;
+
+        const __m128i vb01 = _mm_load_si128((const __m128i*) w);
+        const __m256i vxb01 = _mm256_cvtepi8_epi16(vb01);
+
+        vacc0x01 = _mm256_add_epi32(vacc0x01, _mm256_madd_epi16(vxa0, vxb01));
+        vacc1x01 = _mm256_add_epi32(vacc1x01, _mm256_madd_epi16(vxa1, vxb01));
+        vacc2x01 = _mm256_add_epi32(vacc2x01, _mm256_madd_epi16(vxa2, vxb01));
+        const __m128i vb23 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 16 * sizeof(int8_t)));
+        const __m256i vxb23 = _mm256_cvtepi8_epi16(vb23);
+
+        vacc0x23 = _mm256_add_epi32(vacc0x23, _mm256_madd_epi16(vxa0, vxb23));
+        vacc1x23 = _mm256_add_epi32(vacc1x23, _mm256_madd_epi16(vxa1, vxb23));
+        vacc2x23 = _mm256_add_epi32(vacc2x23, _mm256_madd_epi16(vxa2, vxb23));
+        const __m128i vb45 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 32 * sizeof(int8_t)));
+        const __m256i vxb45 = _mm256_cvtepi8_epi16(vb45);
+
+        vacc0x45 = _mm256_add_epi32(vacc0x45, _mm256_madd_epi16(vxa0, vxb45));
+        vacc1x45 = _mm256_add_epi32(vacc1x45, _mm256_madd_epi16(vxa1, vxb45));
+        vacc2x45 = _mm256_add_epi32(vacc2x45, _mm256_madd_epi16(vxa2, vxb45));
+        const __m128i vb67 = _mm_load_si128((const __m128i*) ((uintptr_t) w + 48 * sizeof(int8_t)));
+        const __m256i vxb67 = _mm256_cvtepi8_epi16(vb67);
+
+        vacc0x67 = _mm256_add_epi32(vacc0x67, _mm256_madd_epi16(vxa0, vxb67));
+        vacc1x67 = _mm256_add_epi32(vacc1x67, _mm256_madd_epi16(vxa1, vxb67));
+        vacc2x67 = _mm256_add_epi32(vacc2x67, _mm256_madd_epi16(vxa2, vxb67));
+
+        w = (const void*) ((uintptr_t) w + 64 * sizeof(int8_t));
+        k += 8 * sizeof(int8_t);
+      }
+      p -= 3 * sizeof(void*);
+    } while (p != 0);
+
+    const __m256i vacc0x0213 = _mm256_hadd_epi32(vacc0x01, vacc0x23);
+    const __m256i vacc0x4657 = _mm256_hadd_epi32(vacc0x45, vacc0x67);
+    const __m256i vacc1x0213 = _mm256_hadd_epi32(vacc1x01, vacc1x23);
+    const __m256i vacc1x4657 = _mm256_hadd_epi32(vacc1x45, vacc1x67);
+    const __m256i vacc2x0213 = _mm256_hadd_epi32(vacc2x01, vacc2x23);
+    const __m256i vacc2x4657 = _mm256_hadd_epi32(vacc2x45, vacc2x67);
+
+    const __m256i vacc0x02461357 = _mm256_hadd_epi32(vacc0x0213, vacc0x4657);
+    const __m256i vacc1x02461357 = _mm256_hadd_epi32(vacc1x0213, vacc1x4657);
+    const __m256i vacc2x02461357 = _mm256_hadd_epi32(vacc2x0213, vacc2x4657);
+
+    const __m256i vpermute_mask = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
+    __m256i vacc0x01234567 = _mm256_permutevar8x32_epi32(vacc0x02461357, vpermute_mask);
+    __m256i vacc1x01234567 = _mm256_permutevar8x32_epi32(vacc1x02461357, vpermute_mask);
+    __m256i vacc2x01234567 = _mm256_permutevar8x32_epi32(vacc2x02461357, vpermute_mask);
+
+    __m256 vscaled0x01234567 = _mm256_cvtepi32_ps(vacc0x01234567);
+    __m256 vscaled1x01234567 = _mm256_cvtepi32_ps(vacc1x01234567);
+    __m256 vscaled2x01234567 = _mm256_cvtepi32_ps(vacc2x01234567);
+
+    const __m256 vscale = _mm256_load_ps(params->fp32_avx2.scale);
+    vscaled0x01234567 = _mm256_mul_ps(vscaled0x01234567, vscale);
+    vscaled1x01234567 = _mm256_mul_ps(vscaled1x01234567, vscale);
+    vscaled2x01234567 = _mm256_mul_ps(vscaled2x01234567, vscale);
+
+    vacc0x01234567 = _mm256_cvtps_epi32(vscaled0x01234567);
+    vacc1x01234567 = _mm256_cvtps_epi32(vscaled1x01234567);
+    vacc2x01234567 = _mm256_cvtps_epi32(vscaled2x01234567);
+
+    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->fp32_avx2.output_zero_point);
+    __m256i vacc01x01234567 = _mm256_adds_epi16(_mm256_packs_epi32(vacc0x01234567, vacc1x01234567), voutput_zero_point);
+    __m256i vacc22x01234567 = _mm256_adds_epi16(_mm256_packs_epi32(vacc2x01234567, vacc2x01234567), voutput_zero_point);
+
+    vacc01x01234567 = _mm256_permute4x64_epi64(vacc01x01234567, _MM_SHUFFLE(3, 1, 2, 0));
+    vacc22x01234567 = _mm256_permute4x64_epi64(vacc22x01234567, _MM_SHUFFLE(3, 1, 2, 0));
+
+    __m256i vout = _mm256_packs_epi16(vacc01x01234567, vacc22x01234567);
+
+    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->fp32_avx2.output_min));
+    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->fp32_avx2.output_max));
+
+    __m128i vout_lo = _mm256_castsi256_si128(vout);
+    __m128i vout_hi = _mm256_extracti128_si256(vout, 1);
+
+    if (nc >= 8) {
+      _mm_storeh_pi((__m64*) c2, _mm_castsi128_ps(vout_lo));
+      _mm_storel_epi64((__m128i*) c1, vout_hi);
+      _mm_storel_epi64((__m128i*) c0, vout_lo);
+
+      c2 = (int8_t*) ((uintptr_t) c2 + cn_stride);
+      c1 = (int8_t*) ((uintptr_t) c1 + cn_stride);
+      c0 = (int8_t*) ((uintptr_t) c0 + cn_stride);
+
+      a = (const int8_t**restrict) ((uintptr_t) a - ks);
+
+      nc -= 8;
+    } else {
+      if (nc & 4) {
+        *((uint32_t*) c2) = (uint32_t) _mm_extract_epi32(vout_lo, 2);
+        _mm_storeu_si32(c1, vout_hi);
+        _mm_storeu_si32(c0, vout_lo);
+
+        c2 += 4;
+        c1 += 4;
+        c0 += 4;
+
+        vout_lo = _mm_srli_epi64(vout_lo, 32);
+        vout_hi = _mm_srli_epi64(vout_hi, 32);
+      }
+      if (nc & 2) {
+        *((uint16_t*) c2) = (uint16_t) _mm_extract_epi16(vout_lo, 4);
+        *((uint16_t*) c1) = (uint16_t) _mm_extract_epi16(vout_hi, 0);
+        *((uint16_t*) c0) = (uint16_t) _mm_extract_epi16(vout_lo, 0);
+
+        c2 += 2;
+        c1 += 2;
+        c0 += 2;
+
+        vout_lo = _mm_srli_epi32(vout_lo, 16);
+        vout_hi = _mm_srli_epi32(vout_hi, 16);
+      }
+      if (nc & 1) {
+        *c2 = (uint8_t) _mm_extract_epi8(vout_lo, 8);
+        *c1 = (uint8_t) _mm_extract_epi8(vout_hi, 0);
+        *c0 = (int8_t) _mm_extract_epi8(vout_lo, 0);
+      }
+
+      nc = 0;
+    }
+  } while (nc != 0);
+}
diff --git a/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-avx2.c b/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-avx2.c
index 1bcb4ff..32d5ac2 100644
--- a/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-avx2.c
+++ b/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-avx2.c
@@ -150,8 +150,8 @@
     __m256i vacc1x01234567 = _mm256_permutevar8x32_epi32(vacc1x02461357, vpermute_mask);
     __m256i vacc2x01234567 = _mm256_permutevar8x32_epi32(vacc2x02461357, vpermute_mask);
 
-    const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->avx2.multiplier);
-    const __m256i vrounding = _mm256_load_si256((const __m256i*) params->avx2.rounding);
+    const __m256i vmultiplier = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.multiplier);
+    const __m256i vrounding = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.rounding);
 
     const __m256i vacc0x11335577 = _mm256_shuffle_epi32(vacc0x01234567, _MM_SHUFFLE(3, 3, 1, 1));
     const __m256i vacc1x11335577 = _mm256_shuffle_epi32(vacc1x01234567, _MM_SHUFFLE(3, 3, 1, 1));
@@ -176,7 +176,7 @@
     const __m256i vq31prod1x01234567 = _mm256_blend_epi16(vq31prod1x0246, vq31prod1x1357, 0xCC);
     const __m256i vq31prod2x01234567 = _mm256_blend_epi16(vq31prod2x0246, vq31prod2x1357, 0xCC);
 
-    const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->avx2.remainder_mask);
+    const __m256i vremainder_mask = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_mask);
     const __m256i vrem0x01234567 =
       _mm256_add_epi32(_mm256_and_si256(vq31prod0x01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod0x01234567));
     const __m256i vrem1x01234567 =
@@ -184,8 +184,8 @@
     const __m256i vrem2x01234567 =
       _mm256_add_epi32(_mm256_and_si256(vq31prod2x01234567, vremainder_mask), _mm256_cmpgt_epi32(_mm256_setzero_si256(), vq31prod2x01234567));
 
-    const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->avx2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->avx2.shift);
+    const __m256i vremainder_threshold = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_avx2.shift);
     vacc0x01234567 =
       _mm256_sub_epi32(_mm256_sra_epi32(vq31prod0x01234567, vshift), _mm256_cmpgt_epi32(vrem0x01234567, vremainder_threshold));
     vacc1x01234567 =
@@ -193,7 +193,7 @@
     vacc2x01234567 =
       _mm256_sub_epi32(_mm256_sra_epi32(vq31prod2x01234567, vshift), _mm256_cmpgt_epi32(vrem2x01234567, vremainder_threshold));
 
-    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->avx2.output_zero_point);
+    const __m256i voutput_zero_point = _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_zero_point);
     __m256i vacc01x01234567 = _mm256_adds_epi16(_mm256_packs_epi32(vacc0x01234567, vacc1x01234567), voutput_zero_point);
     __m256i vacc22x01234567 = _mm256_adds_epi16(_mm256_packs_epi32(vacc2x01234567, vacc2x01234567), voutput_zero_point);
 
@@ -202,8 +202,8 @@
 
     __m256i vout = _mm256_packs_epi16(vacc01x01234567, vacc22x01234567);
 
-    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->avx2.output_min));
-    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->avx2.output_max));
+    vout = _mm256_max_epi8(vout, _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_min));
+    vout = _mm256_min_epi8(vout, _mm256_load_si256((const __m256i*) params->gemmlowp_avx2.output_max));
 
     __m128i vout_lo = _mm256_castsi256_si128(vout);
     __m128i vout_hi = _mm256_extracti128_si256(vout, 1);
diff --git a/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-neon-mlal-padal.c
index 811d1c3..36540e9 100644
--- a/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -329,7 +329,7 @@
     int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -337,7 +337,7 @@
     vacc2x0123 = vqrdmulhq_s32(vacc2x0123, vmultiplier);
     vacc2x4567 = vqrdmulhq_s32(vacc2x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -353,7 +353,7 @@
     vacc2x0123 = vrshlq_s32(vacc2x0123, vright_shift);
     vacc2x4567 = vrshlq_s32(vacc2x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -368,8 +368,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x8_t vout2x01234567 = vqmovn_s16(vacc2x01234567);
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x01234567 = vmax_s8(vout2x01234567, vget_low_s8(voutput_min));
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
diff --git a/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-neon-mull-padal.c
index 85e9f9f..10bbd23 100644
--- a/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-igemm/gen/3x8c8-minmax-gemmlowp-neon-mull-padal.c
@@ -228,7 +228,7 @@
     int32x4_t vacc2x4567 = vcombine_s32(vsum2x45, vsum2x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -236,7 +236,7 @@
     vacc2x0123 = vqrdmulhq_s32(vacc2x0123, vmultiplier);
     vacc2x4567 = vqrdmulhq_s32(vacc2x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -252,7 +252,7 @@
     vacc2x0123 = vrshlq_s32(vacc2x0123, vright_shift);
     vacc2x4567 = vrshlq_s32(vacc2x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -267,8 +267,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x8_t vout2x01234567 = vqmovn_s16(vacc2x01234567);
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x01234567 = vmax_s8(vout2x01234567, vget_low_s8(voutput_min));
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
diff --git a/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 9edcdf6..b4b0e69 100644
--- a/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -465,7 +465,7 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -483,7 +483,7 @@
     vacc3x89AB = vqrdmulhq_s32(vacc3x89AB, vmultiplier);
     vacc3xCDEF = vqrdmulhq_s32(vacc3xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -519,7 +519,7 @@
     vacc3x89AB = vrshlq_s32(vacc3x89AB, vright_shift);
     vacc3xCDEF = vrshlq_s32(vacc3xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -549,8 +549,8 @@
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
     int8x16_t vout3x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc3x01234567), vqmovn_s16(vacc3x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout3x0123456789ABCDEF = vmaxq_s8(vout3x0123456789ABCDEF, voutput_min);
     vout2x0123456789ABCDEF = vmaxq_s8(vout2x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane.c
index 6bca2cd..d0be21a 100644
--- a/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -463,7 +463,7 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -481,7 +481,7 @@
     vacc3x89AB = vqrdmulhq_s32(vacc3x89AB, vmultiplier);
     vacc3xCDEF = vqrdmulhq_s32(vacc3xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -517,7 +517,7 @@
     vacc3x89AB = vrshlq_s32(vacc3x89AB, vright_shift);
     vacc3xCDEF = vrshlq_s32(vacc3xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -547,8 +547,8 @@
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
     int8x16_t vout3x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc3x01234567), vqmovn_s16(vacc3x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout3x0123456789ABCDEF = vmaxq_s8(vout3x0123456789ABCDEF, voutput_min);
     vout2x0123456789ABCDEF = vmaxq_s8(vout2x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mull-addw-dup.c
index 364a7df..bbeb566 100644
--- a/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-igemm/gen/4x16-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -543,7 +543,7 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -561,7 +561,7 @@
     vacc3x89AB = vqrdmulhq_s32(vacc3x89AB, vmultiplier);
     vacc3xCDEF = vqrdmulhq_s32(vacc3xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -597,7 +597,7 @@
     vacc3x89AB = vrshlq_s32(vacc3x89AB, vright_shift);
     vacc3xCDEF = vrshlq_s32(vacc3xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -627,8 +627,8 @@
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
     int8x16_t vout3x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc3x01234567), vqmovn_s16(vacc3x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout3x0123456789ABCDEF = vmaxq_s8(vout3x0123456789ABCDEF, voutput_min);
     vout2x0123456789ABCDEF = vmaxq_s8(vout2x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/4x16c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/4x16c16-minmax-gemmlowp-neon-mlal-padal.c
index af03024..141c850 100644
--- a/src/qs8-igemm/gen/4x16c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/4x16c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -529,7 +529,7 @@
     int32x4_t vacc3xCDEF = vcombine_s32(vsum3xCD, vsum3xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -547,7 +547,7 @@
     vacc3x89AB = vqrdmulhq_s32(vacc3x89AB, vmultiplier);
     vacc3xCDEF = vqrdmulhq_s32(vacc3xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -583,7 +583,7 @@
     vacc3x89AB = vrshlq_s32(vacc3x89AB, vright_shift);
     vacc3xCDEF = vrshlq_s32(vacc3xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -612,8 +612,8 @@
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
     int8x16_t vout3x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc3x01234567), vqmovn_s16(vacc3x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout3x0123456789ABCDEF = vmaxq_s8(vout3x0123456789ABCDEF, voutput_min);
     vout2x0123456789ABCDEF = vmaxq_s8(vout2x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/4x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-igemm/gen/4x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 06e46b5..d6f1631 100644
--- a/src/qs8-igemm/gen/4x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-igemm/gen/4x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -616,7 +616,7 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -634,7 +634,7 @@
     vacc3x89AB = vqrdmulhq_s32(vacc3x89AB, vmultiplier);
     vacc3xCDEF = vqrdmulhq_s32(vacc3xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -670,7 +670,7 @@
     vacc3x89AB = vrshlq_s32(vacc3x89AB, vright_shift);
     vacc3xCDEF = vrshlq_s32(vacc3xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -700,8 +700,8 @@
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
     int8x16_t vout3x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc3x01234567), vqmovn_s16(vacc3x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout3x0123456789ABCDEF = vmaxq_s8(vout3x0123456789ABCDEF, voutput_min);
     vout2x0123456789ABCDEF = vmaxq_s8(vout2x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/4x16c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-igemm/gen/4x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
index 0d1d136..deed830 100644
--- a/src/qs8-igemm/gen/4x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-igemm/gen/4x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -378,7 +378,7 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -396,7 +396,7 @@
     vacc3x89AB = vqrdmulhq_s32(vacc3x89AB, vmultiplier);
     vacc3xCDEF = vqrdmulhq_s32(vacc3xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -432,7 +432,7 @@
     vacc3x89AB = vrshlq_s32(vacc3x89AB, vright_shift);
     vacc3xCDEF = vrshlq_s32(vacc3xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -462,8 +462,8 @@
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
     int8x16_t vout3x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc3x01234567), vqmovn_s16(vacc3x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout3x0123456789ABCDEF = vmaxq_s8(vout3x0123456789ABCDEF, voutput_min);
     vout2x0123456789ABCDEF = vmaxq_s8(vout2x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/4x16c4-minmax-gemmlowp-neondot.c b/src/qs8-igemm/gen/4x16c4-minmax-gemmlowp-neondot.c
index ac21adb..984d364 100644
--- a/src/qs8-igemm/gen/4x16c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-igemm/gen/4x16c4-minmax-gemmlowp-neondot.c
@@ -184,7 +184,7 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -202,7 +202,7 @@
     vacc3x89AB = vqrdmulhq_s32(vacc3x89AB, vmultiplier);
     vacc3xCDEF = vqrdmulhq_s32(vacc3xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -238,7 +238,7 @@
     vacc3x89AB = vrshlq_s32(vacc3x89AB, vright_shift);
     vacc3xCDEF = vrshlq_s32(vacc3xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -268,8 +268,8 @@
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
     int8x16_t vout3x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc3x01234567), vqmovn_s16(vacc3x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout3x0123456789ABCDEF = vmaxq_s8(vout3x0123456789ABCDEF, voutput_min);
     vout2x0123456789ABCDEF = vmaxq_s8(vout2x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-avx512skx.c b/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-avx512skx.c
index 2d928f5..9974e69 100644
--- a/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-avx512skx.c
+++ b/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-avx512skx.c
@@ -56,14 +56,14 @@
 
   const __mmask16 vbias_mask = _cvtu32_mask16(0x1111);
   const __mmask16 vblend_mask = _cvtu32_mask16(0xAAAA);
-  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.multiplier));
-  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.rounding));
-  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.remainder_mask));
-  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse2.remainder_threshold));
-  const __m128i vshift = _mm_load_si128((const __m128i*) params->sse2.shift);
-  const __m512i voutput_zero_point = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.output_zero_point));
-  const __m512i voutput_min = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.output_min));
-  const __m512i voutput_max = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->sse4.output_max));
+  const __m512i vmultiplier = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier));
+  const __m512i vrounding = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding));
+  const __m512i vremainder_mask = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask));
+  const __m512i vremainder_threshold = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold));
+  const __m128i vshift = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.shift);
+  const __m512i voutput_zero_point = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point));
+  const __m512i voutput_min = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+  const __m512i voutput_max = _mm512_broadcast_i32x4(_mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
   do {
     __m512i vacc0x0123 = _mm512_maskz_expandloadu_epi32(vbias_mask, w);
     __m512i vacc0x4567 = _mm512_maskz_expandloadu_epi32(vbias_mask, (const void*) ((uintptr_t) w + 4 * sizeof(int32_t)));
diff --git a/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-neon-mlal-padal.c
index af30a3f..2f88c06 100644
--- a/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -704,7 +704,7 @@
     int32x4_t vacc3xCDEF = vcombine_s32(vsum3xCD, vsum3xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -722,7 +722,7 @@
     vacc3x89AB = vqrdmulhq_s32(vacc3x89AB, vmultiplier);
     vacc3xCDEF = vqrdmulhq_s32(vacc3xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -758,7 +758,7 @@
     vacc3x89AB = vrshlq_s32(vacc3x89AB, vright_shift);
     vacc3xCDEF = vrshlq_s32(vacc3xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -787,8 +787,8 @@
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
     int8x16_t vout3x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc3x01234567), vqmovn_s16(vacc3x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout3x0123456789ABCDEF = vmaxq_s8(vout3x0123456789ABCDEF, voutput_min);
     vout2x0123456789ABCDEF = vmaxq_s8(vout2x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-neon-mull-padal.c
index 35532ef..e255872 100644
--- a/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-igemm/gen/4x16c8-minmax-gemmlowp-neon-mull-padal.c
@@ -465,7 +465,7 @@
     int32x4_t vacc3xCDEF = vcombine_s32(vsum3xCD, vsum3xEF );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -483,7 +483,7 @@
     vacc3x89AB = vqrdmulhq_s32(vacc3x89AB, vmultiplier);
     vacc3xCDEF = vqrdmulhq_s32(vacc3xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -519,7 +519,7 @@
     vacc3x89AB = vrshlq_s32(vacc3x89AB, vright_shift);
     vacc3xCDEF = vrshlq_s32(vacc3xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -548,8 +548,8 @@
     int8x16_t vout2x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc2x89ABCDEF));
     int8x16_t vout3x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc3x01234567), vqmovn_s16(vacc3x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout3x0123456789ABCDEF = vmaxq_s8(vout3x0123456789ABCDEF, voutput_min);
     vout2x0123456789ABCDEF = vmaxq_s8(vout2x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/4x2-minmax-gemmlowp-scalar.c b/src/qs8-igemm/gen/4x2-minmax-gemmlowp-scalar.c
index e6a7616..ffa8fbd 100644
--- a/src/qs8-igemm/gen/4x2-minmax-gemmlowp-scalar.c
+++ b/src/qs8-igemm/gen/4x2-minmax-gemmlowp-scalar.c
@@ -111,7 +111,7 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const int32_t vmultiplier = params->scalar.multiplier;
+    const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
     const int64_t vproduct0x0 = (int64_t) vacc0x0 * (int64_t) vmultiplier;
     const int64_t vproduct0x1 = (int64_t) vacc0x1 * (int64_t) vmultiplier;
     const int64_t vproduct1x0 = (int64_t) vacc1x0 * (int64_t) vmultiplier;
@@ -131,7 +131,7 @@
     const int32_t vq31product3x0 = (int32_t) (uint32_t) ((uint64_t) (vproduct3x0 + vq31rounding) >> 31);
     const int32_t vq31product3x1 = (int32_t) (uint32_t) ((uint64_t) (vproduct3x1 + vq31rounding) >> 31);
 
-    const int32_t vremainder_mask = params->scalar.remainder_mask;
+    const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
     const int32_t vremainder0x0 = (vq31product0x0 & vremainder_mask) - (int32_t) (vq31product0x0 < 0);
     const int32_t vremainder0x1 = (vq31product0x1 & vremainder_mask) - (int32_t) (vq31product0x1 < 0);
     const int32_t vremainder1x0 = (vq31product1x0 & vremainder_mask) - (int32_t) (vq31product1x0 < 0);
@@ -141,8 +141,8 @@
     const int32_t vremainder3x0 = (vq31product3x0 & vremainder_mask) - (int32_t) (vq31product3x0 < 0);
     const int32_t vremainder3x1 = (vq31product3x1 & vremainder_mask) - (int32_t) (vq31product3x1 < 0);
 
-    const uint32_t vshift = params->scalar.shift;
-    const int32_t vremainder_threshold = params->scalar.remainder_threshold;
+    const uint32_t vshift = params->gemmlowp_scalar.shift;
+    const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
     int32_t vout0x0 = asr_s32(vq31product0x0, vshift) + (int32_t) (vremainder0x0 > vremainder_threshold);
     int32_t vout0x1 = asr_s32(vq31product0x1, vshift) + (int32_t) (vremainder0x1 > vremainder_threshold);
     int32_t vout1x0 = asr_s32(vq31product1x0, vshift) + (int32_t) (vremainder1x0 > vremainder_threshold);
@@ -152,7 +152,7 @@
     int32_t vout3x0 = asr_s32(vq31product3x0, vshift) + (int32_t) (vremainder3x0 > vremainder_threshold);
     int32_t vout3x1 = asr_s32(vq31product3x1, vshift) + (int32_t) (vremainder3x1 > vremainder_threshold);
 
-    const int32_t vout_min = params->scalar.output_min_less_zero_point;
+    const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
     vout0x0 = math_max_s32(vout0x0, vout_min);
     vout0x1 = math_max_s32(vout0x1, vout_min);
     vout1x0 = math_max_s32(vout1x0, vout_min);
@@ -162,7 +162,7 @@
     vout3x0 = math_max_s32(vout3x0, vout_min);
     vout3x1 = math_max_s32(vout3x1, vout_min);
 
-    const int32_t vout_max = params->scalar.output_max_less_zero_point;
+    const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
     vout0x0 = math_min_s32(vout0x0, vout_max);
     vout0x1 = math_min_s32(vout0x1, vout_max);
     vout1x0 = math_min_s32(vout1x0, vout_max);
@@ -172,7 +172,7 @@
     vout3x0 = math_min_s32(vout3x0, vout_max);
     vout3x1 = math_min_s32(vout3x1, vout_max);
 
-    const int32_t voutput_zero_point = params->scalar.output_zero_point;
+    const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
     vout0x0 += voutput_zero_point;
     vout0x1 += voutput_zero_point;
     vout1x0 += voutput_zero_point;
diff --git a/src/qs8-igemm/gen/4x4-minmax-gemmlowp-scalar.c b/src/qs8-igemm/gen/4x4-minmax-gemmlowp-scalar.c
index 3d6256d..82f3bcc 100644
--- a/src/qs8-igemm/gen/4x4-minmax-gemmlowp-scalar.c
+++ b/src/qs8-igemm/gen/4x4-minmax-gemmlowp-scalar.c
@@ -129,7 +129,7 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const int32_t vmultiplier = params->scalar.multiplier;
+    const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
     const int64_t vproduct0x0 = (int64_t) vacc0x0 * (int64_t) vmultiplier;
     const int64_t vproduct0x1 = (int64_t) vacc0x1 * (int64_t) vmultiplier;
     const int64_t vproduct0x2 = (int64_t) vacc0x2 * (int64_t) vmultiplier;
@@ -165,7 +165,7 @@
     const int32_t vq31product3x2 = (int32_t) (uint32_t) ((uint64_t) (vproduct3x2 + vq31rounding) >> 31);
     const int32_t vq31product3x3 = (int32_t) (uint32_t) ((uint64_t) (vproduct3x3 + vq31rounding) >> 31);
 
-    const int32_t vremainder_mask = params->scalar.remainder_mask;
+    const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
     const int32_t vremainder0x0 = (vq31product0x0 & vremainder_mask) - (int32_t) (vq31product0x0 < 0);
     const int32_t vremainder0x1 = (vq31product0x1 & vremainder_mask) - (int32_t) (vq31product0x1 < 0);
     const int32_t vremainder0x2 = (vq31product0x2 & vremainder_mask) - (int32_t) (vq31product0x2 < 0);
@@ -183,8 +183,8 @@
     const int32_t vremainder3x2 = (vq31product3x2 & vremainder_mask) - (int32_t) (vq31product3x2 < 0);
     const int32_t vremainder3x3 = (vq31product3x3 & vremainder_mask) - (int32_t) (vq31product3x3 < 0);
 
-    const uint32_t vshift = params->scalar.shift;
-    const int32_t vremainder_threshold = params->scalar.remainder_threshold;
+    const uint32_t vshift = params->gemmlowp_scalar.shift;
+    const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
     int32_t vout0x0 = asr_s32(vq31product0x0, vshift) + (int32_t) (vremainder0x0 > vremainder_threshold);
     int32_t vout0x1 = asr_s32(vq31product0x1, vshift) + (int32_t) (vremainder0x1 > vremainder_threshold);
     int32_t vout0x2 = asr_s32(vq31product0x2, vshift) + (int32_t) (vremainder0x2 > vremainder_threshold);
@@ -202,7 +202,7 @@
     int32_t vout3x2 = asr_s32(vq31product3x2, vshift) + (int32_t) (vremainder3x2 > vremainder_threshold);
     int32_t vout3x3 = asr_s32(vq31product3x3, vshift) + (int32_t) (vremainder3x3 > vremainder_threshold);
 
-    const int32_t vout_min = params->scalar.output_min_less_zero_point;
+    const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
     vout0x0 = math_max_s32(vout0x0, vout_min);
     vout0x1 = math_max_s32(vout0x1, vout_min);
     vout0x2 = math_max_s32(vout0x2, vout_min);
@@ -220,7 +220,7 @@
     vout3x2 = math_max_s32(vout3x2, vout_min);
     vout3x3 = math_max_s32(vout3x3, vout_min);
 
-    const int32_t vout_max = params->scalar.output_max_less_zero_point;
+    const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
     vout0x0 = math_min_s32(vout0x0, vout_max);
     vout0x1 = math_min_s32(vout0x1, vout_max);
     vout0x2 = math_min_s32(vout0x2, vout_max);
@@ -238,7 +238,7 @@
     vout3x2 = math_min_s32(vout3x2, vout_max);
     vout3x3 = math_min_s32(vout3x3, vout_max);
 
-    const int32_t voutput_zero_point = params->scalar.output_zero_point;
+    const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
     vout0x0 += voutput_zero_point;
     vout0x1 += voutput_zero_point;
     vout0x2 += voutput_zero_point;
diff --git a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-avx-ld128.c b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-avx-ld128.c
index a1df771..de5da17 100644
--- a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-avx-ld128.c
@@ -205,8 +205,8 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -237,7 +237,7 @@
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
     const __m128i vq31prod3x0123 = _mm_blend_epi16(vq31prod3x02, vq31prod3x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -247,8 +247,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -258,15 +258,15 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc23x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c3) = (uint32_t) _mm_extract_epi32(vout, 3);
diff --git a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-avx-ld64.c b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-avx-ld64.c
index 7011ea7..d4f6be1 100644
--- a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-avx-ld64.c
@@ -205,8 +205,8 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -237,7 +237,7 @@
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
     const __m128i vq31prod3x0123 = _mm_blend_epi16(vq31prod3x02, vq31prod3x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -247,8 +247,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -258,15 +258,15 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc23x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c3) = (uint32_t) _mm_extract_epi32(vout, 3);
diff --git a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse2-ld128.c b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse2-ld128.c
index 93acf70..3c7af53 100644
--- a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse2-ld128.c
@@ -205,8 +205,8 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -277,7 +277,7 @@
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod3x0123 = _mm_shuffle_epi32(vq31prod3x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -287,8 +287,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -298,12 +298,12 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc23x0123 = _mm_min_epi16(_mm_max_epi16(vacc23x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse2-ld64.c b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse2-ld64.c
index 03607d8..e40ce90 100644
--- a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse2-ld64.c
@@ -205,8 +205,8 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -277,7 +277,7 @@
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod3x0123 = _mm_shuffle_epi32(vq31prod3x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -287,8 +287,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -298,12 +298,12 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc23x0123 = _mm_min_epi16(_mm_max_epi16(vacc23x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse41-ld128.c b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse41-ld128.c
index ab00486..e577a64 100644
--- a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse41-ld128.c
@@ -205,8 +205,8 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -237,7 +237,7 @@
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
     const __m128i vq31prod3x0123 = _mm_blend_epi16(vq31prod3x02, vq31prod3x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -247,8 +247,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -258,15 +258,15 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc23x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c3) = (uint32_t) _mm_extract_epi32(vout, 3);
diff --git a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse41-ld64.c b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse41-ld64.c
index 5530e47..2ab9bc8 100644
--- a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-sse41-ld64.c
@@ -205,8 +205,8 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -237,7 +237,7 @@
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
     const __m128i vq31prod3x0123 = _mm_blend_epi16(vq31prod3x02, vq31prod3x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -247,8 +247,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -258,15 +258,15 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc23x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c3) = (uint32_t) _mm_extract_epi32(vout, 3);
diff --git a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld128.c
index 3c7ba08..f844b67 100644
--- a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld128.c
@@ -205,8 +205,8 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -277,7 +277,7 @@
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod3x0123 = _mm_shuffle_epi32(vq31prod3x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -287,8 +287,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -298,12 +298,12 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc23x0123 = _mm_min_epi16(_mm_max_epi16(vacc23x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld64.c
index f261a0c..d8765d9 100644
--- a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld64.c
@@ -205,8 +205,8 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse2.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.rounding);
 
     const __m128i vnmask0x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc0x0123);
     const __m128i vnmask1x0123 = _mm_cmpgt_epi32(_mm_setzero_si128(), vacc1x0123);
@@ -277,7 +277,7 @@
     const __m128i vq31prod2x0123 = _mm_shuffle_epi32(vq31prod2x0213, _MM_SHUFFLE(3, 1, 2, 0));
     const __m128i vq31prod3x0123 = _mm_shuffle_epi32(vq31prod3x0213, _MM_SHUFFLE(3, 1, 2, 0));
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -287,8 +287,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse2.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse2.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -298,12 +298,12 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
-    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min);
-    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max);
+    const __m128i voutput_min = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_min);
+    const __m128i voutput_max = _mm_load_si128((const __m128i*) params->gemmlowp_sse2.output_max);
     vacc01x0123 = _mm_min_epi16(_mm_max_epi16(vacc01x0123, voutput_min), voutput_max);
     vacc23x0123 = _mm_min_epi16(_mm_max_epi16(vacc23x0123, voutput_min), voutput_max);
 
diff --git a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-xop-ld128.c b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-xop-ld128.c
index 3eb4e00..61b2117 100644
--- a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-xop-ld128.c
@@ -210,8 +210,8 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -242,7 +242,7 @@
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
     const __m128i vq31prod3x0123 = _mm_blend_epi16(vq31prod3x02, vq31prod3x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -252,8 +252,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -263,15 +263,15 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc23x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c3) = (uint32_t) _mm_extract_epi32(vout, 3);
diff --git a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-xop-ld64.c b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-xop-ld64.c
index c610c59..fc69a9c 100644
--- a/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-igemm/gen/4x4c2-minmax-gemmlowp-xop-ld64.c
@@ -210,8 +210,8 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->sse4.multiplier);
-    const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse4.rounding);
+    const __m128i vmultiplier = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.multiplier);
+    const __m128i vrounding = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.rounding);
 
     const __m128i vacc0x1133 = _mm_shuffle_epi32(vacc0x0123, _MM_SHUFFLE(3, 3, 1, 1));
     const __m128i vacc1x1133 = _mm_shuffle_epi32(vacc1x0123, _MM_SHUFFLE(3, 3, 1, 1));
@@ -242,7 +242,7 @@
     const __m128i vq31prod2x0123 = _mm_blend_epi16(vq31prod2x02, vq31prod2x13, 0xCC);
     const __m128i vq31prod3x0123 = _mm_blend_epi16(vq31prod3x02, vq31prod3x13, 0xCC);
 
-    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse4.remainder_mask);
+    const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_mask);
     const __m128i vrem0x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod0x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod0x0123));
     const __m128i vrem1x0123 =
@@ -252,8 +252,8 @@
     const __m128i vrem3x0123 =
       _mm_add_epi32(_mm_and_si128(vq31prod3x0123, vremainder_mask), _mm_cmpgt_epi32(_mm_setzero_si128(), vq31prod3x0123));
 
-    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse4.remainder_threshold);
-    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->sse4.shift);
+    const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.remainder_threshold);
+    const __m128i vshift = _mm_loadl_epi64((const __m128i*) params->gemmlowp_sse4.shift);
     vacc0x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod0x0123, vshift), _mm_cmpgt_epi32(vrem0x0123, vremainder_threshold));
     vacc1x0123 =
@@ -263,15 +263,15 @@
     vacc3x0123 =
       _mm_sub_epi32(_mm_sra_epi32(vq31prod3x0123, vshift), _mm_cmpgt_epi32(vrem3x0123, vremainder_threshold));
 
-    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse4.output_zero_point);
+    const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_zero_point);
     __m128i vacc01x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc0x0123, vacc1x0123), voutput_zero_point);
     __m128i vacc23x0123 = _mm_adds_epi16(_mm_packs_epi32(vacc2x0123, vacc3x0123), voutput_zero_point);
 
 
     __m128i vout = _mm_packs_epi16(vacc01x0123, vacc23x0123);
 
-    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_min));
-    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->sse4.output_max));
+    vout = _mm_max_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_min));
+    vout = _mm_min_epi8(vout, _mm_load_si128((const __m128i*) params->gemmlowp_sse4.output_max));
 
     if (nc >= 4) {
       *((uint32_t*) c3) = (uint32_t) _mm_extract_epi32(vout, 3);
diff --git a/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index d7cd248..e0b5d5c 100644
--- a/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -298,7 +298,7 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -308,7 +308,7 @@
     vacc3x0123 = vqrdmulhq_s32(vacc3x0123, vmultiplier);
     vacc3x4567 = vqrdmulhq_s32(vacc3x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -328,7 +328,7 @@
     vacc3x0123 = vrshlq_s32(vacc3x0123, vright_shift);
     vacc3x4567 = vrshlq_s32(vacc3x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -346,8 +346,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
diff --git a/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c
index 3d94c7b..89b6e23 100644
--- a/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -297,7 +297,7 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -307,7 +307,7 @@
     vacc3x0123 = vqrdmulhq_s32(vacc3x0123, vmultiplier);
     vacc3x4567 = vqrdmulhq_s32(vacc3x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -327,7 +327,7 @@
     vacc3x0123 = vrshlq_s32(vacc3x0123, vright_shift);
     vacc3x4567 = vrshlq_s32(vacc3x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -345,8 +345,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
diff --git a/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mull-addw-dup.c
index 8074e57..6af314f 100644
--- a/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-igemm/gen/4x8-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -332,7 +332,7 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -342,7 +342,7 @@
     vacc3x0123 = vqrdmulhq_s32(vacc3x0123, vmultiplier);
     vacc3x4567 = vqrdmulhq_s32(vacc3x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -362,7 +362,7 @@
     vacc3x0123 = vrshlq_s32(vacc3x0123, vright_shift);
     vacc3x4567 = vrshlq_s32(vacc3x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -380,8 +380,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
diff --git a/src/qs8-igemm/gen/4x8c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/4x8c16-minmax-gemmlowp-neon-mlal-padal.c
index 186e0a9..afef31c 100644
--- a/src/qs8-igemm/gen/4x8c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/4x8c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -313,7 +313,7 @@
     int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -323,7 +323,7 @@
     vacc3x0123 = vqrdmulhq_s32(vacc3x0123, vmultiplier);
     vacc3x4567 = vqrdmulhq_s32(vacc3x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -343,7 +343,7 @@
     vacc3x0123 = vrshlq_s32(vacc3x0123, vright_shift);
     vacc3x4567 = vrshlq_s32(vacc3x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -360,8 +360,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
diff --git a/src/qs8-igemm/gen/4x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-igemm/gen/4x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 5e717ae..7c12248 100644
--- a/src/qs8-igemm/gen/4x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-igemm/gen/4x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -370,7 +370,7 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -380,7 +380,7 @@
     vacc3x0123 = vqrdmulhq_s32(vacc3x0123, vmultiplier);
     vacc3x4567 = vqrdmulhq_s32(vacc3x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -400,7 +400,7 @@
     vacc3x0123 = vrshlq_s32(vacc3x0123, vright_shift);
     vacc3x4567 = vrshlq_s32(vacc3x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -418,8 +418,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
diff --git a/src/qs8-igemm/gen/4x8c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-igemm/gen/4x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
index 513cee6..405667f 100644
--- a/src/qs8-igemm/gen/4x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-igemm/gen/4x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -244,7 +244,7 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -254,7 +254,7 @@
     vacc3x0123 = vqrdmulhq_s32(vacc3x0123, vmultiplier);
     vacc3x4567 = vqrdmulhq_s32(vacc3x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -274,7 +274,7 @@
     vacc3x0123 = vrshlq_s32(vacc3x0123, vright_shift);
     vacc3x4567 = vrshlq_s32(vacc3x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -292,8 +292,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
diff --git a/src/qs8-igemm/gen/4x8c4-minmax-gemmlowp-neondot.c b/src/qs8-igemm/gen/4x8c4-minmax-gemmlowp-neondot.c
index 1f75974..3879c41 100644
--- a/src/qs8-igemm/gen/4x8c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-igemm/gen/4x8c4-minmax-gemmlowp-neondot.c
@@ -146,7 +146,7 @@
       p -= 4 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -156,7 +156,7 @@
     vacc3x0123 = vqrdmulhq_s32(vacc3x0123, vmultiplier);
     vacc3x4567 = vqrdmulhq_s32(vacc3x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -176,7 +176,7 @@
     vacc3x0123 = vrshlq_s32(vacc3x0123, vright_shift);
     vacc3x4567 = vrshlq_s32(vacc3x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -194,8 +194,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
diff --git a/src/qs8-igemm/gen/4x8c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-igemm/gen/4x8c8-minmax-gemmlowp-neon-mlal-padal.c
index 1495f51..fb38be2 100644
--- a/src/qs8-igemm/gen/4x8c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-igemm/gen/4x8c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -408,7 +408,7 @@
     int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -418,7 +418,7 @@
     vacc3x0123 = vqrdmulhq_s32(vacc3x0123, vmultiplier);
     vacc3x4567 = vqrdmulhq_s32(vacc3x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -438,7 +438,7 @@
     vacc3x0123 = vrshlq_s32(vacc3x0123, vright_shift);
     vacc3x4567 = vrshlq_s32(vacc3x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -455,8 +455,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
diff --git a/src/qs8-igemm/gen/4x8c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-igemm/gen/4x8c8-minmax-gemmlowp-neon-mull-padal.c
index 97817c4..efd7ec4 100644
--- a/src/qs8-igemm/gen/4x8c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-igemm/gen/4x8c8-minmax-gemmlowp-neon-mull-padal.c
@@ -281,7 +281,7 @@
     int32x4_t vacc3x4567 = vcombine_s32(vsum3x45, vsum3x67 );
 #endif
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -291,7 +291,7 @@
     vacc3x0123 = vqrdmulhq_s32(vacc3x0123, vmultiplier);
     vacc3x4567 = vqrdmulhq_s32(vacc3x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -311,7 +311,7 @@
     vacc3x0123 = vrshlq_s32(vacc3x0123, vright_shift);
     vacc3x4567 = vrshlq_s32(vacc3x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -328,8 +328,8 @@
     int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc1x01234567));
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
     vout0x01234567_1x01234567 = vmaxq_s8(vout0x01234567_1x01234567, voutput_min);
diff --git a/src/qs8-igemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-igemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 91dd3df..a766833 100644
--- a/src/qs8-igemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-igemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -617,7 +617,7 @@
       p -= 6 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -643,7 +643,7 @@
     vacc5x89AB = vqrdmulhq_s32(vacc5x89AB, vmultiplier);
     vacc5xCDEF = vqrdmulhq_s32(vacc5xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -695,7 +695,7 @@
     vacc5x89AB = vrshlq_s32(vacc5x89AB, vright_shift);
     vacc5xCDEF = vrshlq_s32(vacc5xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -737,8 +737,8 @@
     int8x16_t vout4x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc4x89ABCDEF));
     int8x16_t vout5x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc5x01234567), vqmovn_s16(vacc5x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout5x0123456789ABCDEF = vmaxq_s8(vout5x0123456789ABCDEF, voutput_min);
     vout4x0123456789ABCDEF = vmaxq_s8(vout4x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-igemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c
index c645ef6..e03346b 100644
--- a/src/qs8-igemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-igemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -615,7 +615,7 @@
       p -= 6 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -641,7 +641,7 @@
     vacc5x89AB = vqrdmulhq_s32(vacc5x89AB, vmultiplier);
     vacc5xCDEF = vqrdmulhq_s32(vacc5xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -693,7 +693,7 @@
     vacc5x89AB = vrshlq_s32(vacc5x89AB, vright_shift);
     vacc5xCDEF = vrshlq_s32(vacc5xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -735,8 +735,8 @@
     int8x16_t vout4x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc4x89ABCDEF));
     int8x16_t vout5x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc5x01234567), vqmovn_s16(vacc5x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout5x0123456789ABCDEF = vmaxq_s8(vout5x0123456789ABCDEF, voutput_min);
     vout4x0123456789ABCDEF = vmaxq_s8(vout4x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/6x16c4-minmax-gemmlowp-neondot.c b/src/qs8-igemm/gen/6x16c4-minmax-gemmlowp-neondot.c
index 7c9b9da..550da3c 100644
--- a/src/qs8-igemm/gen/6x16c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-igemm/gen/6x16c4-minmax-gemmlowp-neondot.c
@@ -236,7 +236,7 @@
       p -= 6 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -262,7 +262,7 @@
     vacc5x89AB = vqrdmulhq_s32(vacc5x89AB, vmultiplier);
     vacc5xCDEF = vqrdmulhq_s32(vacc5xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -314,7 +314,7 @@
     vacc5x89AB = vrshlq_s32(vacc5x89AB, vright_shift);
     vacc5xCDEF = vrshlq_s32(vacc5xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -356,8 +356,8 @@
     int8x16_t vout4x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc4x89ABCDEF));
     int8x16_t vout5x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc5x01234567), vqmovn_s16(vacc5x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout5x0123456789ABCDEF = vmaxq_s8(vout5x0123456789ABCDEF, voutput_min);
     vout4x0123456789ABCDEF = vmaxq_s8(vout4x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-igemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 26da7be..be68e94 100644
--- a/src/qs8-igemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-igemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -386,7 +386,7 @@
       p -= 6 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -400,7 +400,7 @@
     vacc5x0123 = vqrdmulhq_s32(vacc5x0123, vmultiplier);
     vacc5x4567 = vqrdmulhq_s32(vacc5x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -428,7 +428,7 @@
     vacc5x0123 = vrshlq_s32(vacc5x0123, vright_shift);
     vacc5x4567 = vrshlq_s32(vacc5x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -452,8 +452,8 @@
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
     int8x16_t vout4x01234567_5x01234567 = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc5x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout4x01234567_5x01234567 = vmaxq_s8(vout4x01234567_5x01234567, voutput_min);
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
diff --git a/src/qs8-igemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-igemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c
index df3e24c..565e3c1 100644
--- a/src/qs8-igemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-igemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -385,7 +385,7 @@
       p -= 6 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -399,7 +399,7 @@
     vacc5x0123 = vqrdmulhq_s32(vacc5x0123, vmultiplier);
     vacc5x4567 = vqrdmulhq_s32(vacc5x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -427,7 +427,7 @@
     vacc5x0123 = vrshlq_s32(vacc5x0123, vright_shift);
     vacc5x4567 = vrshlq_s32(vacc5x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -451,8 +451,8 @@
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
     int8x16_t vout4x01234567_5x01234567 = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc5x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout4x01234567_5x01234567 = vmaxq_s8(vout4x01234567_5x01234567, voutput_min);
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
diff --git a/src/qs8-igemm/gen/6x8c4-minmax-gemmlowp-neondot.c b/src/qs8-igemm/gen/6x8c4-minmax-gemmlowp-neondot.c
index 1e4aab9..f778923 100644
--- a/src/qs8-igemm/gen/6x8c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-igemm/gen/6x8c4-minmax-gemmlowp-neondot.c
@@ -182,7 +182,7 @@
       p -= 6 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -196,7 +196,7 @@
     vacc5x0123 = vqrdmulhq_s32(vacc5x0123, vmultiplier);
     vacc5x4567 = vqrdmulhq_s32(vacc5x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -224,7 +224,7 @@
     vacc5x0123 = vrshlq_s32(vacc5x0123, vright_shift);
     vacc5x4567 = vrshlq_s32(vacc5x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -248,8 +248,8 @@
     int8x16_t vout2x01234567_3x01234567 = vcombine_s8(vqmovn_s16(vacc2x01234567), vqmovn_s16(vacc3x01234567));
     int8x16_t vout4x01234567_5x01234567 = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc5x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout4x01234567_5x01234567 = vmaxq_s8(vout4x01234567_5x01234567, voutput_min);
     vout2x01234567_3x01234567 = vmaxq_s8(vout2x01234567_3x01234567, voutput_min);
diff --git a/src/qs8-igemm/gen/8x16c4-minmax-gemmlowp-neondot.c b/src/qs8-igemm/gen/8x16c4-minmax-gemmlowp-neondot.c
index e5c1331..3970c26 100644
--- a/src/qs8-igemm/gen/8x16c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-igemm/gen/8x16c4-minmax-gemmlowp-neondot.c
@@ -288,7 +288,7 @@
       p -= 8 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc0x89AB = vqrdmulhq_s32(vacc0x89AB, vmultiplier);
@@ -322,7 +322,7 @@
     vacc7x89AB = vqrdmulhq_s32(vacc7x89AB, vmultiplier);
     vacc7xCDEF = vqrdmulhq_s32(vacc7xCDEF, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -390,7 +390,7 @@
     vacc7x89AB = vrshlq_s32(vacc7x89AB, vright_shift);
     vacc7xCDEF = vrshlq_s32(vacc7xCDEF, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc0x89ABCDEF = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x89AB), vacc0xCDEF), voutput_zero_point);
@@ -444,8 +444,8 @@
     int8x16_t vout6x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc6x01234567), vqmovn_s16(vacc6x89ABCDEF));
     int8x16_t vout7x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc7x01234567), vqmovn_s16(vacc7x89ABCDEF));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout7x0123456789ABCDEF = vmaxq_s8(vout7x0123456789ABCDEF, voutput_min);
     vout6x0123456789ABCDEF = vmaxq_s8(vout6x0123456789ABCDEF, voutput_min);
diff --git a/src/qs8-igemm/gen/8x8c4-minmax-gemmlowp-neondot.c b/src/qs8-igemm/gen/8x8c4-minmax-gemmlowp-neondot.c
index bc697f1..0b35cfc 100644
--- a/src/qs8-igemm/gen/8x8c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-igemm/gen/8x8c4-minmax-gemmlowp-neondot.c
@@ -218,7 +218,7 @@
       p -= 8 * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     vacc0x0123 = vqrdmulhq_s32(vacc0x0123, vmultiplier);
     vacc0x4567 = vqrdmulhq_s32(vacc0x4567, vmultiplier);
     vacc1x0123 = vqrdmulhq_s32(vacc1x0123, vmultiplier);
@@ -236,7 +236,7 @@
     vacc7x0123 = vqrdmulhq_s32(vacc7x0123, vmultiplier);
     vacc7x4567 = vqrdmulhq_s32(vacc7x4567, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     vacc0x0123 = vsraq_n_s32(vacc0x0123, vbicq_s32(vacc0x0123, vzero_shift_mask), 31);
     vacc0x4567 = vsraq_n_s32(vacc0x4567, vbicq_s32(vacc0x4567, vzero_shift_mask), 31);
@@ -272,7 +272,7 @@
     vacc7x0123 = vrshlq_s32(vacc7x0123, vright_shift);
     vacc7x4567 = vrshlq_s32(vacc7x4567, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     const int16x8_t vacc0x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc0x0123), vacc0x4567), voutput_zero_point);
     const int16x8_t vacc1x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc1x0123), vacc1x4567), voutput_zero_point);
@@ -302,8 +302,8 @@
     int8x16_t vout4x01234567_5x01234567 = vcombine_s8(vqmovn_s16(vacc4x01234567), vqmovn_s16(vacc5x01234567));
     int8x16_t vout6x01234567_7x01234567 = vcombine_s8(vqmovn_s16(vacc6x01234567), vqmovn_s16(vacc7x01234567));
 #endif
-    const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-    const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+    const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+    const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     vout6x01234567_7x01234567 = vmaxq_s8(vout6x01234567_7x01234567, voutput_min);
     vout4x01234567_5x01234567 = vmaxq_s8(vout4x01234567_5x01234567, voutput_min);
diff --git a/src/qs8-igemm/neon-mlal-lane.c.in b/src/qs8-igemm/neon-mlal-lane.c.in
index 33a6b16..5527bc2 100644
--- a/src/qs8-igemm/neon-mlal-lane.c.in
+++ b/src/qs8-igemm/neon-mlal-lane.c.in
@@ -184,12 +184,12 @@
       p -= ${MR} * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     $for M in range(MR):
       $for N in range(0, NR, 4):
         vacc${M}x${ABC[N:N+4]} = vqrdmulhq_s32(vacc${M}x${ABC[N:N+4]}, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     $for M in range(MR):
       $for N in range(0, NR, 4):
@@ -199,7 +199,7 @@
       $for N in range(0, NR, 4):
         vacc${M}x${ABC[N:N+4]} = vrshlq_s32(vacc${M}x${ABC[N:N+4]}, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     $for M in range(MR):
       $for N in range(0, NR, 8):
@@ -228,11 +228,11 @@
           int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
 #endif
     $if NR == 8 and MR == 1:
-      const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-      const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+      const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
     $else:
-      const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-      const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+      const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     $for M in reversed(range(MR)):
       $for N in range(0, NR, 16):
diff --git a/src/qs8-igemm/neon-mull-addw-dup.c.in b/src/qs8-igemm/neon-mull-addw-dup.c.in
index 65a3d13..145de0b 100644
--- a/src/qs8-igemm/neon-mull-addw-dup.c.in
+++ b/src/qs8-igemm/neon-mull-addw-dup.c.in
@@ -169,12 +169,12 @@
       p -= ${MR} * sizeof(void*);
     } while (p != 0);
 
-    const int32x4_t vmultiplier = vld1q_dup_s32(&params->neon.multiplier);
+    const int32x4_t vmultiplier = vld1q_dup_s32(&params->gemmlowp_neon.multiplier);
     $for M in range(MR):
       $for N in range(0, NR, 4):
         vacc${M}x${ABC[N:N+4]} = vqrdmulhq_s32(vacc${M}x${ABC[N:N+4]}, vmultiplier);
 
-    const int32x4_t vright_shift = vld1q_dup_s32(&params->neon.right_shift);
+    const int32x4_t vright_shift = vld1q_dup_s32(&params->gemmlowp_neon.right_shift);
     const int32x4_t vzero_shift_mask = vreinterpretq_s32_u32(vceqq_s32(vright_shift, vmovq_n_s32(0)));
     $for M in range(MR):
       $for N in range(0, NR, 4):
@@ -184,7 +184,7 @@
       $for N in range(0, NR, 4):
         vacc${M}x${ABC[N:N+4]} = vrshlq_s32(vacc${M}x${ABC[N:N+4]}, vright_shift);
 
-    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->neon.output_zero_point);
+    const int16x8_t voutput_zero_point = vld1q_dup_s16(&params->gemmlowp_neon.output_zero_point);
 #if XNN_ARCH_ARM64
     $for M in range(MR):
       $for N in range(0, NR, 8):
@@ -213,11 +213,11 @@
           int8x8_t vout${M}x${ABC[N:N+8]} = vqmovn_s16(vacc${M}x${ABC[N:N+8]});
 #endif
     $if NR == 8 and MR == 1:
-      const int8x8_t voutput_min = vld1_dup_s8(&params->neon.output_min);
-      const int8x8_t voutput_max = vld1_dup_s8(&params->neon.output_max);
+      const int8x8_t voutput_min = vld1_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x8_t voutput_max = vld1_dup_s8(&params->gemmlowp_neon.output_max);
     $else:
-      const int8x16_t voutput_min = vld1q_dup_s8(&params->neon.output_min);
-      const int8x16_t voutput_max = vld1q_dup_s8(&params->neon.output_max);
+      const int8x16_t voutput_min = vld1q_dup_s8(&params->gemmlowp_neon.output_min);
+      const int8x16_t voutput_max = vld1q_dup_s8(&params->gemmlowp_neon.output_max);
 
     $for M in reversed(range(MR)):
       $for N in range(0, NR, 16):
diff --git a/src/qs8-igemm/scalar.c.in b/src/qs8-igemm/scalar.c.in
index 349de59..f26d11a 100644
--- a/src/qs8-igemm/scalar.c.in
+++ b/src/qs8-igemm/scalar.c.in
@@ -85,7 +85,7 @@
       p -= ${MR} * sizeof(void*);
     } while (p != 0);
 
-    const int32_t vmultiplier = params->scalar.multiplier;
+    const int32_t vmultiplier = params->gemmlowp_scalar.multiplier;
     $for M in range(MR):
       $for N in range(NR):
         const int64_t vproduct${M}x${N} = (int64_t) vacc${M}x${N} * (int64_t) vmultiplier;
@@ -95,28 +95,28 @@
       $for N in range(NR):
         const int32_t vq31product${M}x${N} = (int32_t) (uint32_t) ((uint64_t) (vproduct${M}x${N} + vq31rounding) >> 31);
 
-    const int32_t vremainder_mask = params->scalar.remainder_mask;
+    const int32_t vremainder_mask = params->gemmlowp_scalar.remainder_mask;
     $for M in range(MR):
       $for N in range(NR):
         const int32_t vremainder${M}x${N} = (vq31product${M}x${N} & vremainder_mask) - (int32_t) (vq31product${M}x${N} < 0);
 
-    const uint32_t vshift = params->scalar.shift;
-    const int32_t vremainder_threshold = params->scalar.remainder_threshold;
+    const uint32_t vshift = params->gemmlowp_scalar.shift;
+    const int32_t vremainder_threshold = params->gemmlowp_scalar.remainder_threshold;
     $for M in range(MR):
       $for N in range(NR):
         int32_t vout${M}x${N} = asr_s32(vq31product${M}x${N}, vshift) + (int32_t) (vremainder${M}x${N} > vremainder_threshold);
 
-    const int32_t vout_min = params->scalar.output_min_less_zero_point;
+    const int32_t vout_min = params->gemmlowp_scalar.output_min_less_zero_point;
     $for M in range(MR):
       $for N in range(NR):
         vout${M}x${N} = math_max_s32(vout${M}x${N}, vout_min);
 
-    const int32_t vout_max = params->scalar.output_max_less_zero_point;
+    const int32_t vout_max = params->gemmlowp_scalar.output_max_less_zero_point;
     $for M in range(MR):
       $for N in range(NR):
         vout${M}x${N} = math_min_s32(vout${M}x${N}, vout_max);
 
-    const int32_t voutput_zero_point = params->scalar.output_zero_point;
+    const int32_t voutput_zero_point = params->gemmlowp_scalar.output_zero_point;
     $for M in range(MR):
       $for N in range(NR):
         vout${M}x${N} += voutput_zero_point;
diff --git a/src/xnnpack/dwconv.h b/src/xnnpack/dwconv.h
index 0b9f641..51b55bf 100644
--- a/src/xnnpack/dwconv.h
+++ b/src/xnnpack/dwconv.h
@@ -314,6 +314,9 @@
 DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16)
 DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16)
 
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16)
+
 DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32)
 DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32)
 DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32)
@@ -331,6 +334,11 @@
 DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32)
 DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32)
 
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32)
+
 DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32)
 DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32)
 
@@ -366,6 +374,9 @@
 DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16)
 DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16)
 
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16)
+
 DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32)
 DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32)
 DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32)
@@ -383,6 +394,11 @@
 DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32)
 DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32)
 
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32)
+DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32)
+
 DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32)
 DECLARE_QS8_DWCONV_MINMAX_UNIPASS_UKERNEL_FUNCTION(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32)
 
diff --git a/src/xnnpack/gemm.h b/src/xnnpack/gemm.h
index 0f79913..a1be4c0 100644
--- a/src/xnnpack/gemm.h
+++ b/src/xnnpack/gemm.h
@@ -726,6 +726,10 @@
 DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2)
 DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2)
 
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2)
+DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2)
+
 DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx)
 DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx)
 DECLARE_QS8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx)
diff --git a/src/xnnpack/igemm.h b/src/xnnpack/igemm.h
index 6071bb9..d032731 100644
--- a/src/xnnpack/igemm.h
+++ b/src/xnnpack/igemm.h
@@ -529,6 +529,10 @@
 DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2)
 DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2)
 
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2)
+DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2)
+
 DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx)
 DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx)
 DECLARE_QS8_IGEMM_MINMAX_UKERNEL_FUNCTION(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx)
diff --git a/src/xnnpack/params-init.h b/src/xnnpack/params-init.h
index a245112..a1410cd 100644
--- a/src/xnnpack/params-init.h
+++ b/src/xnnpack/params-init.h
@@ -147,7 +147,7 @@
 }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
-static inline void xnn_init_qs8_conv_minmax_scalar_params(
+static inline void xnn_init_qs8_conv_minmax_gemmlowp_scalar_params(
   union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
   float scale,
   int8_t output_zero_point,
@@ -170,17 +170,17 @@
   const uint32_t remainder_mask = (UINT32_C(1) << shift) - UINT32_C(1);
   const uint32_t remainder_threshold = remainder_mask >> 1;
 
-  params->scalar.multiplier = multiplier;
-  params->scalar.remainder_mask = (int32_t) remainder_mask;
-  params->scalar.remainder_threshold = (int32_t) remainder_threshold;
-  params->scalar.shift = (uint32_t) shift;
-  params->scalar.output_min_less_zero_point = (int32_t) output_min - (int32_t) output_zero_point;
-  params->scalar.output_max_less_zero_point = (int32_t) output_max - (int32_t) output_zero_point;
-  params->scalar.output_zero_point = (int32_t) output_zero_point;
+  params->gemmlowp_scalar.multiplier = multiplier;
+  params->gemmlowp_scalar.remainder_mask = (int32_t) remainder_mask;
+  params->gemmlowp_scalar.remainder_threshold = (int32_t) remainder_threshold;
+  params->gemmlowp_scalar.shift = (uint32_t) shift;
+  params->gemmlowp_scalar.output_min_less_zero_point = (int32_t) output_min - (int32_t) output_zero_point;
+  params->gemmlowp_scalar.output_max_less_zero_point = (int32_t) output_max - (int32_t) output_zero_point;
+  params->gemmlowp_scalar.output_zero_point = (int32_t) output_zero_point;
 }
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-static inline void xnn_init_qs8_conv_minmax_sse2_params(
+static inline void xnn_init_qs8_conv_minmax_gemmlowp_sse2_params(
   union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
   float scale,
   int8_t output_zero_point,
@@ -202,30 +202,30 @@
 
   const uint32_t remainder_mask = (UINT32_C(1) << shift) - UINT32_C(1);
   const uint32_t remainder_threshold = remainder_mask >> 1;
-  params->sse2.multiplier[0] = multiplier;
-  params->sse2.multiplier[1] = multiplier;
-  params->sse2.multiplier[2] = multiplier;
-  params->sse2.multiplier[3] = multiplier;
-  params->sse2.rounding[0] = UINT64_C(0x40000000);
-  params->sse2.rounding[1] = UINT64_C(0x40000000);
-  params->sse2.remainder_mask[0] = (int32_t) remainder_mask;
-  params->sse2.remainder_mask[1] = (int32_t) remainder_mask;
-  params->sse2.remainder_mask[2] = (int32_t) remainder_mask;
-  params->sse2.remainder_mask[3] = (int32_t) remainder_mask;
-  params->sse2.remainder_threshold[0] = (int32_t) remainder_threshold;
-  params->sse2.remainder_threshold[1] = (int32_t) remainder_threshold;
-  params->sse2.remainder_threshold[2] = (int32_t) remainder_threshold;
-  params->sse2.remainder_threshold[3] = (int32_t) remainder_threshold;
-  params->sse2.shift[0] = (uint64_t) (uint32_t) shift;
-  params->sse2.shift[1] = (uint64_t) (uint32_t) shift;
+  params->gemmlowp_sse2.multiplier[0] = multiplier;
+  params->gemmlowp_sse2.multiplier[1] = multiplier;
+  params->gemmlowp_sse2.multiplier[2] = multiplier;
+  params->gemmlowp_sse2.multiplier[3] = multiplier;
+  params->gemmlowp_sse2.rounding[0] = UINT64_C(0x40000000);
+  params->gemmlowp_sse2.rounding[1] = UINT64_C(0x40000000);
+  params->gemmlowp_sse2.remainder_mask[0] = (int32_t) remainder_mask;
+  params->gemmlowp_sse2.remainder_mask[1] = (int32_t) remainder_mask;
+  params->gemmlowp_sse2.remainder_mask[2] = (int32_t) remainder_mask;
+  params->gemmlowp_sse2.remainder_mask[3] = (int32_t) remainder_mask;
+  params->gemmlowp_sse2.remainder_threshold[0] = (int32_t) remainder_threshold;
+  params->gemmlowp_sse2.remainder_threshold[1] = (int32_t) remainder_threshold;
+  params->gemmlowp_sse2.remainder_threshold[2] = (int32_t) remainder_threshold;
+  params->gemmlowp_sse2.remainder_threshold[3] = (int32_t) remainder_threshold;
+  params->gemmlowp_sse2.shift[0] = (uint64_t) (uint32_t) shift;
+  params->gemmlowp_sse2.shift[1] = (uint64_t) (uint32_t) shift;
   for (uint32_t i = 0; i < 8; i++) {
-    params->sse2.output_zero_point[i] = (int16_t) output_zero_point;
-    params->sse2.output_min[i] = (int16_t) output_min;
-    params->sse2.output_max[i] = (int16_t) output_max;
+    params->gemmlowp_sse2.output_zero_point[i] = (int16_t) output_zero_point;
+    params->gemmlowp_sse2.output_min[i] = (int16_t) output_min;
+    params->gemmlowp_sse2.output_max[i] = (int16_t) output_max;
   }
 }
 
-static inline void xnn_init_qs8_conv_minmax_sse4_params(
+static inline void xnn_init_qs8_conv_minmax_gemmlowp_sse4_params(
   union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
   float scale,
   int8_t output_zero_point,
@@ -247,32 +247,32 @@
 
   const uint32_t remainder_mask = (UINT32_C(1) << shift) - UINT32_C(1);
   const uint32_t remainder_threshold = remainder_mask >> 1;
-  params->sse4.multiplier[0] = multiplier;
-  params->sse4.multiplier[1] = multiplier;
-  params->sse4.multiplier[2] = multiplier;
-  params->sse4.multiplier[3] = multiplier;
-  params->sse4.rounding[0] = UINT64_C(0x40000000);
-  params->sse4.rounding[1] = UINT64_C(0x40000000);
-  params->sse4.remainder_mask[0] = (int32_t) remainder_mask;
-  params->sse4.remainder_mask[1] = (int32_t) remainder_mask;
-  params->sse4.remainder_mask[2] = (int32_t) remainder_mask;
-  params->sse4.remainder_mask[3] = (int32_t) remainder_mask;
-  params->sse4.remainder_threshold[0] = (int32_t) remainder_threshold;
-  params->sse4.remainder_threshold[1] = (int32_t) remainder_threshold;
-  params->sse4.remainder_threshold[2] = (int32_t) remainder_threshold;
-  params->sse4.remainder_threshold[3] = (int32_t) remainder_threshold;
-  params->sse4.shift[0] = (uint64_t) (uint32_t) shift;
-  params->sse4.shift[1] = (uint64_t) (uint32_t) shift;
+  params->gemmlowp_sse4.multiplier[0] = multiplier;
+  params->gemmlowp_sse4.multiplier[1] = multiplier;
+  params->gemmlowp_sse4.multiplier[2] = multiplier;
+  params->gemmlowp_sse4.multiplier[3] = multiplier;
+  params->gemmlowp_sse4.rounding[0] = UINT64_C(0x40000000);
+  params->gemmlowp_sse4.rounding[1] = UINT64_C(0x40000000);
+  params->gemmlowp_sse4.remainder_mask[0] = (int32_t) remainder_mask;
+  params->gemmlowp_sse4.remainder_mask[1] = (int32_t) remainder_mask;
+  params->gemmlowp_sse4.remainder_mask[2] = (int32_t) remainder_mask;
+  params->gemmlowp_sse4.remainder_mask[3] = (int32_t) remainder_mask;
+  params->gemmlowp_sse4.remainder_threshold[0] = (int32_t) remainder_threshold;
+  params->gemmlowp_sse4.remainder_threshold[1] = (int32_t) remainder_threshold;
+  params->gemmlowp_sse4.remainder_threshold[2] = (int32_t) remainder_threshold;
+  params->gemmlowp_sse4.remainder_threshold[3] = (int32_t) remainder_threshold;
+  params->gemmlowp_sse4.shift[0] = (uint64_t) (uint32_t) shift;
+  params->gemmlowp_sse4.shift[1] = (uint64_t) (uint32_t) shift;
   for (uint32_t i = 0; i < 8; i++) {
-    params->sse4.output_zero_point[i] = (int16_t) output_zero_point;
+    params->gemmlowp_sse4.output_zero_point[i] = (int16_t) output_zero_point;
   }
   for (uint32_t i = 0; i < 16; i++) {
-    params->sse4.output_min[i] = output_min;
-    params->sse4.output_max[i] = output_max;
+    params->gemmlowp_sse4.output_min[i] = output_min;
+    params->gemmlowp_sse4.output_max[i] = output_max;
   }
 }
 
-static inline void xnn_init_qs8_conv_minmax_avx2_params(
+static inline void xnn_init_qs8_conv_minmax_gemmlowp_avx2_params(
   union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
   float scale,
   int8_t output_zero_point,
@@ -295,32 +295,51 @@
   const uint32_t remainder_mask = (UINT32_C(1) << shift) - UINT32_C(1);
   const uint32_t remainder_threshold = remainder_mask >> 1;
   for (uint32_t i = 0; i < 8; i++) {
-    params->avx2.multiplier[i] = multiplier;
+    params->gemmlowp_avx2.multiplier[i] = multiplier;
   }
-  params->avx2.rounding[0] = UINT64_C(0x40000000);
-  params->avx2.rounding[1] = UINT64_C(0x40000000);
-  params->avx2.rounding[2] = UINT64_C(0x40000000);
-  params->avx2.rounding[3] = UINT64_C(0x40000000);
+  params->gemmlowp_avx2.rounding[0] = UINT64_C(0x40000000);
+  params->gemmlowp_avx2.rounding[1] = UINT64_C(0x40000000);
+  params->gemmlowp_avx2.rounding[2] = UINT64_C(0x40000000);
+  params->gemmlowp_avx2.rounding[3] = UINT64_C(0x40000000);
   for (uint32_t i = 0; i < 8; i++) {
-    params->avx2.remainder_mask[i] = (int32_t) remainder_mask;
-    params->avx2.remainder_threshold[i] = (int32_t) remainder_threshold;
+    params->gemmlowp_avx2.remainder_mask[i] = (int32_t) remainder_mask;
+    params->gemmlowp_avx2.remainder_threshold[i] = (int32_t) remainder_threshold;
   }
-  params->avx2.shift[0] = (uint64_t) (uint32_t) shift;
-  params->avx2.shift[1] = (uint64_t) (uint32_t) shift;
-  params->avx2.shift[2] = (uint64_t) (uint32_t) shift;
-  params->avx2.shift[3] = (uint64_t) (uint32_t) shift;
+  params->gemmlowp_avx2.shift[0] = (uint64_t) (uint32_t) shift;
+  params->gemmlowp_avx2.shift[1] = (uint64_t) (uint32_t) shift;
+  params->gemmlowp_avx2.shift[2] = (uint64_t) (uint32_t) shift;
+  params->gemmlowp_avx2.shift[3] = (uint64_t) (uint32_t) shift;
   for (uint32_t i = 0; i < 16; i++) {
-    params->avx2.output_zero_point[i] = (int16_t) output_zero_point;
+    params->gemmlowp_avx2.output_zero_point[i] = (int16_t) output_zero_point;
   }
   for (uint32_t i = 0; i < 32; i++) {
-    params->avx2.output_min[i] = output_min;
-    params->avx2.output_max[i] = output_max;
+    params->gemmlowp_avx2.output_min[i] = output_min;
+    params->gemmlowp_avx2.output_max[i] = output_max;
+  }
+}
+
+static inline void xnn_init_qs8_conv_minmax_fp32_avx2_params(
+  union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
+  float scale,
+  int8_t output_zero_point,
+  int8_t output_min,
+  int8_t output_max)
+{
+  for (uint32_t i = 0; i < 8; i++) {
+    params->fp32_avx2.scale[i] = scale;
+  }
+  for (uint32_t i = 0; i < 16; i++) {
+    params->fp32_avx2.output_zero_point[i] = (int16_t) output_zero_point;
+  }
+  for (uint32_t i = 0; i < 32; i++) {
+    params->fp32_avx2.output_min[i] = output_min;
+    params->fp32_avx2.output_max[i] = output_max;
   }
 }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
-static inline void xnn_init_qs8_conv_minmax_neon_params(
+static inline void xnn_init_qs8_conv_minmax_gemmlowp_neon_params(
   union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
   float scale,
   int8_t output_zero_point,
@@ -340,16 +359,16 @@
   assert(shift >= 0);
   assert(shift < 32);
 
-  params->neon.multiplier = multiplier;
-  params->neon.right_shift = -shift;
-  params->neon.output_zero_point = (int16_t) output_zero_point;
-  params->neon.output_min = output_min;
-  params->neon.output_max = output_max;
+  params->gemmlowp_neon.multiplier = multiplier;
+  params->gemmlowp_neon.right_shift = -shift;
+  params->gemmlowp_neon.output_zero_point = (int16_t) output_zero_point;
+  params->gemmlowp_neon.output_min = output_min;
+  params->gemmlowp_neon.output_max = output_max;
 }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
 #if XNN_ARCH_WASMSIMD
-static inline void xnn_init_qs8_conv_minmax_wasmsimd_params(
+static inline void xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params(
   union xnn_qs8_conv_minmax_params params[XNN_MIN_ELEMENTS(1)],
   float scale,
   int8_t output_zero_point,
@@ -372,25 +391,25 @@
   const int64_t twice_multiplier = INT64_C(2) * (int64_t) multiplier;
   const uint32_t remainder_mask = (UINT32_C(1) << shift) - UINT32_C(1);
   const uint32_t remainder_threshold = remainder_mask >> 1;
-  params->wasmsimd.multiplier[0] = twice_multiplier;
-  params->wasmsimd.multiplier[1] = twice_multiplier;
-  params->wasmsimd.rounding[0] = INT64_C(0x80000000);
-  params->wasmsimd.rounding[1] = INT64_C(0x80000000);
-  params->wasmsimd.remainder_mask[0] = (int32_t) remainder_mask;
-  params->wasmsimd.remainder_mask[1] = (int32_t) remainder_mask;
-  params->wasmsimd.remainder_mask[2] = (int32_t) remainder_mask;
-  params->wasmsimd.remainder_mask[3] = (int32_t) remainder_mask;
-  params->wasmsimd.remainder_threshold[0] = (int32_t) remainder_threshold;
-  params->wasmsimd.remainder_threshold[1] = (int32_t) remainder_threshold;
-  params->wasmsimd.remainder_threshold[2] = (int32_t) remainder_threshold;
-  params->wasmsimd.remainder_threshold[3] = (int32_t) remainder_threshold;
-  params->wasmsimd.shift = shift;
+  params->gemmlowp_wasmsimd.multiplier[0] = twice_multiplier;
+  params->gemmlowp_wasmsimd.multiplier[1] = twice_multiplier;
+  params->gemmlowp_wasmsimd.rounding[0] = INT64_C(0x80000000);
+  params->gemmlowp_wasmsimd.rounding[1] = INT64_C(0x80000000);
+  params->gemmlowp_wasmsimd.remainder_mask[0] = (int32_t) remainder_mask;
+  params->gemmlowp_wasmsimd.remainder_mask[1] = (int32_t) remainder_mask;
+  params->gemmlowp_wasmsimd.remainder_mask[2] = (int32_t) remainder_mask;
+  params->gemmlowp_wasmsimd.remainder_mask[3] = (int32_t) remainder_mask;
+  params->gemmlowp_wasmsimd.remainder_threshold[0] = (int32_t) remainder_threshold;
+  params->gemmlowp_wasmsimd.remainder_threshold[1] = (int32_t) remainder_threshold;
+  params->gemmlowp_wasmsimd.remainder_threshold[2] = (int32_t) remainder_threshold;
+  params->gemmlowp_wasmsimd.remainder_threshold[3] = (int32_t) remainder_threshold;
+  params->gemmlowp_wasmsimd.shift = shift;
   for (uint32_t i = 0; i < 8; i++) {
-    params->wasmsimd.output_zero_point[i] = (int16_t) output_zero_point;
+    params->gemmlowp_wasmsimd.output_zero_point[i] = (int16_t) output_zero_point;
   }
   for (uint32_t i = 0; i < 16; i++) {
-    params->wasmsimd.output_min[i] = output_min;
-    params->wasmsimd.output_max[i] = output_max;
+    params->gemmlowp_wasmsimd.output_min[i] = output_min;
+    params->gemmlowp_wasmsimd.output_max[i] = output_max;
   }
 }
 #endif  // XNN_ARCH_WASMSIMD
diff --git a/src/xnnpack/params.h b/src/xnnpack/params.h
index 54955a4..4601652 100644
--- a/src/xnnpack/params.h
+++ b/src/xnnpack/params.h
@@ -291,7 +291,7 @@
     int32_t output_min_less_zero_point;
     int32_t output_max_less_zero_point;
     int32_t output_zero_point;
-  } scalar;
+  } gemmlowp_scalar;
 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
   struct {
     int32_t multiplier;
@@ -299,7 +299,7 @@
     int16_t output_zero_point;
     int8_t output_min;
     int8_t output_max;
-  } neon;
+  } gemmlowp_neon;
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
   struct {
@@ -311,7 +311,7 @@
     XNN_ALIGN(16) int16_t output_zero_point[8];
     XNN_ALIGN(16) int16_t output_min[8];
     XNN_ALIGN(16) int16_t output_max[8];
-  } sse2;
+  } gemmlowp_sse2;
   struct {
     XNN_ALIGN(16) uint32_t multiplier[4];
     XNN_ALIGN(16) uint64_t rounding[2];
@@ -321,7 +321,7 @@
     XNN_ALIGN(16) int16_t output_zero_point[8];
     XNN_ALIGN(16) int8_t output_min[16];
     XNN_ALIGN(16) int8_t output_max[16];
-  } sse4;
+  } gemmlowp_sse4;
   struct {
     XNN_ALIGN(32) uint32_t multiplier[8];
     XNN_ALIGN(32) uint64_t rounding[4];
@@ -331,7 +331,25 @@
     XNN_ALIGN(32) int16_t output_zero_point[16];
     XNN_ALIGN(32) int8_t output_min[32];
     XNN_ALIGN(32) int8_t output_max[32];
-  } avx2;
+  } gemmlowp_avx2;
+  struct {
+    XNN_ALIGN(16) float scale[4];
+    XNN_ALIGN(16) int16_t output_zero_point[8];
+    XNN_ALIGN(16) int16_t output_min[8];
+    XNN_ALIGN(16) int16_t output_max[8];
+  } fp32_sse2;
+  struct {
+    XNN_ALIGN(16) float scale[4];
+    XNN_ALIGN(16) int16_t output_zero_point[8];
+    XNN_ALIGN(16) int8_t output_min[16];
+    XNN_ALIGN(16) int8_t output_max[16];
+  } fp32_sse4;
+  struct {
+    XNN_ALIGN(32) float scale[8];
+    XNN_ALIGN(32) int16_t output_zero_point[16];
+    XNN_ALIGN(32) int8_t output_min[32];
+    XNN_ALIGN(32) int8_t output_max[32];
+  } fp32_avx2;
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 #if XNN_ARCH_WASMSIMD
   struct {
@@ -343,7 +361,7 @@
     XNN_ALIGN(16) int16_t output_zero_point[8];
     XNN_ALIGN(16) int8_t output_min[16];
     XNN_ALIGN(16) int8_t output_max[16];
-  } wasmsimd;
+  } gemmlowp_wasmsimd;
 #endif  // XNN_ARCH_WASMSIMD
 };
 
diff --git a/test/qs8-dwconv-minmax-fp32.cc b/test/qs8-dwconv-minmax-fp32.cc
new file mode 100644
index 0000000..2a6d601
--- /dev/null
+++ b/test/qs8-dwconv-minmax-fp32.cc
@@ -0,0 +1,2252 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+// All rights reserved.
+//
+// Copyright 2019 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+//
+// Auto-generated file. Do not edit!
+//   Specification: test/qs8-dwconv-minmax-fp32.yaml
+//   Generator: tools/generate-dwconv-test.py
+
+
+#include <gtest/gtest.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/isa-checks.h>
+
+#include <xnnpack/dwconv.h>
+#include "dwconv-microkernel-tester.h"
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, c_eq_16) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(9)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, c_div_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, c_lt_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, c_gt_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL16, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, c_eq_32) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(32)
+      .kr(9)
+      .channels(32)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, c_div_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, c_div_32_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, c_div_32_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, c_lt_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, c_gt_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, c_gt_32_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, c_gt_32_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(32)
+        .width(5)
+        .output_stride(163)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .input_offset(592)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL16, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 64; channels < 512; channels += 96) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(9)
+          .channels(channels)
+          .input_offset(592)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_eq_8) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(8)
+      .kr(9)
+      .channels(8)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_div_8_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_lt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 8; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, c_gt_8_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(8)
+        .width(5)
+        .output_stride(43)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(9)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X9__AVX2_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(9)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_eq_16) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(9)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_lt_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(9)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X9__AVX2_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(9)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_eq_24) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(24)
+      .kr(9)
+      .channels(24)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_div_24) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_div_24_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_div_24_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_lt_24) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 24; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_gt_24) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_gt_24_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, c_gt_24_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(24)
+        .width(5)
+        .output_stride(127)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(9)
+        .channels(channels)
+        .input_offset(464)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X9__AVX2_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 48; channels < 384; channels += 72) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(9)
+          .channels(channels)
+          .input_offset(464)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_eq_32) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(32)
+      .kr(9)
+      .channels(32)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_div_32_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_lt_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, c_gt_32_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      for (size_t step = 2; step <= 9; step++) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(9)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(32)
+        .width(5)
+        .output_stride(163)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(9)
+        .channels(channels)
+        .input_offset(592)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X9__AVX2_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 9; mz++) {
+      for (uint32_t channels = 64; channels < 512; channels += 96) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(9)
+          .channels(channels)
+          .input_offset(592)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, c_eq_16) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(25)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, c_div_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, c_lt_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, c_gt_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL16, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, c_eq_32) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(32)
+      .kr(25)
+      .channels(32)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, c_div_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, c_div_32_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, c_div_32_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, c_lt_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, c_gt_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, c_gt_32_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, c_gt_32_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(32)
+        .width(5)
+        .output_stride(163)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .input_offset(592)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL16, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 64; channels < 512; channels += 96) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(25)
+          .channels(channels)
+          .input_offset(592)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_eq_8) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(8)
+      .kr(25)
+      .channels(8)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_div_8_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_lt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 8; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, c_gt_8_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 9; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(8)
+        .width(5)
+        .output_stride(43)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 40; channels += 7) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 16; channels < 128; channels += 24) {
+      DWConvMicrokernelTester()
+        .cr(8)
+        .kr(25)
+        .channels(channels)
+        .input_offset(176)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP8X25__AVX2_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 16; channels < 128; channels += 24) {
+        DWConvMicrokernelTester()
+          .cr(8)
+          .kr(25)
+          .channels(channels)
+          .input_offset(176)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_eq_16) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(16)
+      .kr(25)
+      .channels(16)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_div_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_lt_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 16; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, c_gt_16_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 17; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(16)
+        .width(5)
+        .output_stride(83)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 80; channels += 15) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 32; channels < 256; channels += 48) {
+      DWConvMicrokernelTester()
+        .cr(16)
+        .kr(25)
+        .channels(channels)
+        .input_offset(304)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP16X25__AVX2_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 32; channels < 256; channels += 48) {
+        DWConvMicrokernelTester()
+          .cr(16)
+          .kr(25)
+          .channels(channels)
+          .input_offset(304)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_eq_24) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(24)
+      .kr(25)
+      .channels(24)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_div_24) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_div_24_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_div_24_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_lt_24) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 24; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_gt_24) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_gt_24_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, c_gt_24_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 25; channels < 48; channels++) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(24)
+        .width(5)
+        .output_stride(127)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 120; channels += 23) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 48; channels < 384; channels += 72) {
+      DWConvMicrokernelTester()
+        .cr(24)
+        .kr(25)
+        .channels(channels)
+        .input_offset(464)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP24X25__AVX2_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 48; channels < 384; channels += 72) {
+        DWConvMicrokernelTester()
+          .cr(24)
+          .kr(25)
+          .channels(channels)
+          .input_offset(464)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_eq_32) {
+    TEST_REQUIRES_X86_AVX2;
+    DWConvMicrokernelTester()
+      .cr(32)
+      .kr(25)
+      .channels(32)
+      .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_div_32_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_lt_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 1; channels < 32; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, c_gt_32_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 33; channels < 64; channels++) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_step) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      for (size_t step = 2; step <= 25; step++) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(25)
+          .channels(channels)
+          .width(3)
+          .step(step)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_output_stride) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(32)
+        .width(5)
+        .output_stride(163)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmin(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, multipixel_with_qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t channels = 1; channels <= 160; channels += 31) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .width(3)
+        .qmax(128)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, input_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t channels = 64; channels < 512; channels += 96) {
+      DWConvMicrokernelTester()
+        .cr(32)
+        .kr(25)
+        .channels(channels)
+        .input_offset(592)
+        .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_DWCONV_MINMAX_FP32_UP32X25__AVX2_MUL32, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 25; mz++) {
+      for (uint32_t channels = 64; channels < 512; channels += 96) {
+        DWConvMicrokernelTester()
+          .cr(32)
+          .kr(25)
+          .channels(channels)
+          .input_offset(592)
+          .zero_index(mz)
+          .Test(xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
diff --git a/test/qs8-dwconv-minmax-fp32.yaml b/test/qs8-dwconv-minmax-fp32.yaml
new file mode 100644
index 0000000..5ee2e24
--- /dev/null
+++ b/test/qs8-dwconv-minmax-fp32.yaml
@@ -0,0 +1,28 @@
+# Copyright 2021 Google LLC
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16
+  init: xnn_init_qs8_conv_minmax_fp32_avx2_params
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16
+  init: xnn_init_qs8_conv_minmax_fp32_avx2_params
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32
+  init: xnn_init_qs8_conv_minmax_fp32_avx2_params
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32
+  init: xnn_init_qs8_conv_minmax_fp32_avx2_params
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up24x9__avx2_mul32
+  init: xnn_init_qs8_conv_minmax_fp32_avx2_params
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32
+  init: xnn_init_qs8_conv_minmax_fp32_avx2_params
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul16
+  init: xnn_init_qs8_conv_minmax_fp32_avx2_params
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16
+  init: xnn_init_qs8_conv_minmax_fp32_avx2_params
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up8x25__avx2_mul32
+  init: xnn_init_qs8_conv_minmax_fp32_avx2_params
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up16x25__avx2_mul32
+  init: xnn_init_qs8_conv_minmax_fp32_avx2_params
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up24x25__avx2_mul32
+  init: xnn_init_qs8_conv_minmax_fp32_avx2_params
+- name: xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32
+  init: xnn_init_qs8_conv_minmax_fp32_avx2_params
diff --git a/test/qs8-dwconv-minmax-gemmlowp.cc b/test/qs8-dwconv-minmax-gemmlowp.cc
index ef5ebcf..1693f6a 100644
--- a/test/qs8-dwconv-minmax-gemmlowp.cc
+++ b/test/qs8-dwconv-minmax-gemmlowp.cc
@@ -27,7 +27,7 @@
       .cr(8)
       .kr(9)
       .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__NEON_MUL16, c_div_8) {
@@ -37,7 +37,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49,7 +49,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61,7 +61,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72,7 +72,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -83,7 +83,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -95,7 +95,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -107,7 +107,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -119,7 +119,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -133,7 +133,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -147,7 +147,7 @@
         .channels(8)
         .width(5)
         .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -160,7 +160,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -173,7 +173,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -185,7 +185,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -199,7 +199,7 @@
           .channels(channels)
           .input_offset(176)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -213,7 +213,7 @@
       .cr(16)
       .kr(9)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__NEON_MUL16, c_div_16) {
@@ -223,7 +223,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -235,7 +235,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -247,7 +247,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -258,7 +258,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -269,7 +269,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -281,7 +281,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -293,7 +293,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -305,7 +305,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -319,7 +319,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -333,7 +333,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -346,7 +346,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -359,7 +359,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -371,7 +371,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -385,7 +385,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -399,7 +399,7 @@
       .cr(24)
       .kr(9)
       .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__NEON_MUL16, c_div_24) {
@@ -409,7 +409,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -421,7 +421,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -433,7 +433,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -444,7 +444,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -455,7 +455,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -467,7 +467,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -479,7 +479,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -491,7 +491,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -505,7 +505,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -519,7 +519,7 @@
         .channels(24)
         .width(5)
         .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -532,7 +532,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -545,7 +545,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -557,7 +557,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -571,7 +571,7 @@
           .channels(channels)
           .input_offset(464)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -585,7 +585,7 @@
       .cr(32)
       .kr(9)
       .channels(32)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__NEON_MUL16, c_div_32) {
@@ -595,7 +595,7 @@
         .cr(32)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -607,7 +607,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -619,7 +619,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -630,7 +630,7 @@
         .cr(32)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -641,7 +641,7 @@
         .cr(32)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -653,7 +653,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -665,7 +665,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -677,7 +677,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -691,7 +691,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -705,7 +705,7 @@
         .channels(32)
         .width(5)
         .output_stride(163)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -718,7 +718,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -731,7 +731,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -743,7 +743,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(592)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -757,7 +757,7 @@
           .channels(channels)
           .input_offset(592)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -771,7 +771,7 @@
       .cr(8)
       .kr(9)
       .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE2_MUL16, c_div_8) {
@@ -781,7 +781,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -793,7 +793,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -805,7 +805,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -816,7 +816,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -827,7 +827,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -839,7 +839,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -851,7 +851,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -863,7 +863,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -877,7 +877,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -891,7 +891,7 @@
         .channels(8)
         .width(5)
         .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -904,7 +904,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -917,7 +917,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -929,7 +929,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -943,7 +943,7 @@
           .channels(channels)
           .input_offset(176)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -957,7 +957,7 @@
       .cr(16)
       .kr(9)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE2_MUL16, c_div_16) {
@@ -967,7 +967,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -979,7 +979,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -991,7 +991,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1002,7 +1002,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1013,7 +1013,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1025,7 +1025,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1037,7 +1037,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1049,7 +1049,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1063,7 +1063,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1077,7 +1077,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1090,7 +1090,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1103,7 +1103,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1115,7 +1115,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1129,7 +1129,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1143,7 +1143,7 @@
       .cr(24)
       .kr(9)
       .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE2_MUL16, c_div_24) {
@@ -1153,7 +1153,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1165,7 +1165,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1177,7 +1177,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1188,7 +1188,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1199,7 +1199,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1211,7 +1211,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1223,7 +1223,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1235,7 +1235,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1249,7 +1249,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1263,7 +1263,7 @@
         .channels(24)
         .width(5)
         .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1276,7 +1276,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1289,7 +1289,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1301,7 +1301,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1315,7 +1315,7 @@
           .channels(channels)
           .input_offset(464)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1329,7 +1329,7 @@
       .cr(8)
       .kr(9)
       .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSSE3_MUL16, c_div_8) {
@@ -1339,7 +1339,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1351,7 +1351,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1363,7 +1363,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1374,7 +1374,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1385,7 +1385,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1397,7 +1397,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1409,7 +1409,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1421,7 +1421,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1435,7 +1435,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1449,7 +1449,7 @@
         .channels(8)
         .width(5)
         .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1462,7 +1462,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1475,7 +1475,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1487,7 +1487,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1501,7 +1501,7 @@
           .channels(channels)
           .input_offset(176)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1515,7 +1515,7 @@
       .cr(16)
       .kr(9)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSSE3_MUL16, c_div_16) {
@@ -1525,7 +1525,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1537,7 +1537,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1549,7 +1549,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1560,7 +1560,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1571,7 +1571,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1583,7 +1583,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1595,7 +1595,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1607,7 +1607,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1621,7 +1621,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1635,7 +1635,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1648,7 +1648,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1661,7 +1661,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1673,7 +1673,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1687,7 +1687,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1701,7 +1701,7 @@
       .cr(24)
       .kr(9)
       .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSSE3_MUL16, c_div_24) {
@@ -1711,7 +1711,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1723,7 +1723,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1735,7 +1735,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1746,7 +1746,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1757,7 +1757,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1769,7 +1769,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1781,7 +1781,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1793,7 +1793,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1807,7 +1807,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1821,7 +1821,7 @@
         .channels(24)
         .width(5)
         .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1834,7 +1834,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1847,7 +1847,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1859,7 +1859,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1873,7 +1873,7 @@
           .channels(channels)
           .input_offset(464)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1887,7 +1887,7 @@
       .cr(8)
       .kr(9)
       .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL16, c_div_8) {
@@ -1897,7 +1897,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1909,7 +1909,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1921,7 +1921,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1932,7 +1932,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1943,7 +1943,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1955,7 +1955,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1967,7 +1967,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1979,7 +1979,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1993,7 +1993,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2007,7 +2007,7 @@
         .channels(8)
         .width(5)
         .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2020,7 +2020,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2033,7 +2033,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2045,7 +2045,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2059,7 +2059,7 @@
           .channels(channels)
           .input_offset(176)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2073,7 +2073,7 @@
       .cr(16)
       .kr(9)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL16, c_div_16) {
@@ -2083,7 +2083,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2095,7 +2095,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2107,7 +2107,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2118,7 +2118,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2129,7 +2129,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2141,7 +2141,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2153,7 +2153,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2165,7 +2165,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2179,7 +2179,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2193,7 +2193,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2206,7 +2206,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2219,7 +2219,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2231,7 +2231,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2245,7 +2245,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2259,7 +2259,7 @@
       .cr(24)
       .kr(9)
       .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL16, c_div_24) {
@@ -2269,7 +2269,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2281,7 +2281,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2293,7 +2293,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2304,7 +2304,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2315,7 +2315,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2327,7 +2327,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2339,7 +2339,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2351,7 +2351,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2365,7 +2365,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2379,7 +2379,7 @@
         .channels(24)
         .width(5)
         .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2392,7 +2392,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2405,7 +2405,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2417,7 +2417,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2431,7 +2431,7 @@
           .channels(channels)
           .input_offset(464)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2445,7 +2445,7 @@
       .cr(8)
       .kr(9)
       .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL16, c_div_8) {
@@ -2455,7 +2455,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2467,7 +2467,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2479,7 +2479,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2490,7 +2490,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2501,7 +2501,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2513,7 +2513,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2525,7 +2525,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2537,7 +2537,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2551,7 +2551,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2565,7 +2565,7 @@
         .channels(8)
         .width(5)
         .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2578,7 +2578,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2591,7 +2591,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2603,7 +2603,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2617,7 +2617,7 @@
           .channels(channels)
           .input_offset(176)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2631,7 +2631,7 @@
       .cr(16)
       .kr(9)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL16, c_div_16) {
@@ -2641,7 +2641,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2653,7 +2653,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2665,7 +2665,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2676,7 +2676,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2687,7 +2687,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2699,7 +2699,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2711,7 +2711,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2723,7 +2723,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2737,7 +2737,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2751,7 +2751,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2764,7 +2764,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2777,7 +2777,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2789,7 +2789,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2803,7 +2803,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2817,7 +2817,7 @@
       .cr(24)
       .kr(9)
       .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL16, c_div_24) {
@@ -2827,7 +2827,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2839,7 +2839,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2851,7 +2851,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2862,7 +2862,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2873,7 +2873,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2885,7 +2885,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2897,7 +2897,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2909,7 +2909,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2923,7 +2923,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2937,7 +2937,7 @@
         .channels(24)
         .width(5)
         .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2950,7 +2950,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2963,7 +2963,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2975,7 +2975,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2989,7 +2989,7 @@
           .channels(channels)
           .input_offset(464)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3003,7 +3003,7 @@
       .cr(16)
       .kr(9)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL16, c_div_16) {
@@ -3013,7 +3013,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3025,7 +3025,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3037,7 +3037,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3048,7 +3048,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3059,7 +3059,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3071,7 +3071,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3083,7 +3083,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3095,7 +3095,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3109,7 +3109,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3123,7 +3123,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3136,7 +3136,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3149,7 +3149,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3161,7 +3161,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3175,7 +3175,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3189,7 +3189,7 @@
       .cr(32)
       .kr(9)
       .channels(32)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL16, c_div_32) {
@@ -3199,7 +3199,7 @@
         .cr(32)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3211,7 +3211,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3223,7 +3223,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3234,7 +3234,7 @@
         .cr(32)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3245,7 +3245,7 @@
         .cr(32)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3257,7 +3257,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3269,7 +3269,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3281,7 +3281,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3295,7 +3295,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3309,7 +3309,7 @@
         .channels(32)
         .width(5)
         .output_stride(163)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3322,7 +3322,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3335,7 +3335,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3347,7 +3347,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(592)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3361,7 +3361,7 @@
           .channels(channels)
           .input_offset(592)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3375,7 +3375,7 @@
       .cr(8)
       .kr(9)
       .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__SSE41_MUL32, c_div_8) {
@@ -3385,7 +3385,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3397,7 +3397,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3409,7 +3409,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3420,7 +3420,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3431,7 +3431,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3443,7 +3443,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3455,7 +3455,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3467,7 +3467,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3481,7 +3481,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3495,7 +3495,7 @@
         .channels(8)
         .width(5)
         .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3508,7 +3508,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3521,7 +3521,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3533,7 +3533,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3547,7 +3547,7 @@
           .channels(channels)
           .input_offset(176)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3561,7 +3561,7 @@
       .cr(16)
       .kr(9)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__SSE41_MUL32, c_div_16) {
@@ -3571,7 +3571,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3583,7 +3583,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3595,7 +3595,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3606,7 +3606,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3617,7 +3617,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3629,7 +3629,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3641,7 +3641,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3653,7 +3653,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3667,7 +3667,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3681,7 +3681,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3694,7 +3694,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3707,7 +3707,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3719,7 +3719,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3733,7 +3733,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3747,7 +3747,7 @@
       .cr(24)
       .kr(9)
       .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__SSE41_MUL32, c_div_24) {
@@ -3757,7 +3757,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3769,7 +3769,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3781,7 +3781,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3792,7 +3792,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3803,7 +3803,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3815,7 +3815,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3827,7 +3827,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3839,7 +3839,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3853,7 +3853,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3867,7 +3867,7 @@
         .channels(24)
         .width(5)
         .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3880,7 +3880,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3893,7 +3893,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3905,7 +3905,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3919,7 +3919,7 @@
           .channels(channels)
           .input_offset(464)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3933,7 +3933,7 @@
       .cr(8)
       .kr(9)
       .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX_MUL32, c_div_8) {
@@ -3943,7 +3943,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3955,7 +3955,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3967,7 +3967,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3978,7 +3978,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3989,7 +3989,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4001,7 +4001,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4013,7 +4013,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4025,7 +4025,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4039,7 +4039,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4053,7 +4053,7 @@
         .channels(8)
         .width(5)
         .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4066,7 +4066,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4079,7 +4079,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4091,7 +4091,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4105,7 +4105,7 @@
           .channels(channels)
           .input_offset(176)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4119,7 +4119,7 @@
       .cr(16)
       .kr(9)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX_MUL32, c_div_16) {
@@ -4129,7 +4129,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4141,7 +4141,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4153,7 +4153,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4164,7 +4164,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4175,7 +4175,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4187,7 +4187,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4199,7 +4199,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4211,7 +4211,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4225,7 +4225,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4239,7 +4239,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4252,7 +4252,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4265,7 +4265,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4277,7 +4277,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4291,7 +4291,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4305,7 +4305,7 @@
       .cr(24)
       .kr(9)
       .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX_MUL32, c_div_24) {
@@ -4315,7 +4315,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4327,7 +4327,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4339,7 +4339,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4350,7 +4350,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4361,7 +4361,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4373,7 +4373,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4385,7 +4385,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4397,7 +4397,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4411,7 +4411,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4425,7 +4425,7 @@
         .channels(24)
         .width(5)
         .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4438,7 +4438,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4451,7 +4451,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4463,7 +4463,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4477,7 +4477,7 @@
           .channels(channels)
           .input_offset(464)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4491,7 +4491,7 @@
       .cr(8)
       .kr(9)
       .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__XOP_MUL32, c_div_8) {
@@ -4501,7 +4501,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4513,7 +4513,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4525,7 +4525,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4536,7 +4536,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4547,7 +4547,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4559,7 +4559,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4571,7 +4571,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4583,7 +4583,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4597,7 +4597,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4611,7 +4611,7 @@
         .channels(8)
         .width(5)
         .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4624,7 +4624,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4637,7 +4637,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4649,7 +4649,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4663,7 +4663,7 @@
           .channels(channels)
           .input_offset(176)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4677,7 +4677,7 @@
       .cr(16)
       .kr(9)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__XOP_MUL32, c_div_16) {
@@ -4687,7 +4687,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4699,7 +4699,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4711,7 +4711,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4722,7 +4722,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4733,7 +4733,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4745,7 +4745,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4757,7 +4757,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4769,7 +4769,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4783,7 +4783,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4797,7 +4797,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4810,7 +4810,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4823,7 +4823,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4835,7 +4835,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4849,7 +4849,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4863,7 +4863,7 @@
       .cr(24)
       .kr(9)
       .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__XOP_MUL32, c_div_24) {
@@ -4873,7 +4873,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4885,7 +4885,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4897,7 +4897,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4908,7 +4908,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4919,7 +4919,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4931,7 +4931,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4943,7 +4943,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4955,7 +4955,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4969,7 +4969,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4983,7 +4983,7 @@
         .channels(24)
         .width(5)
         .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4996,7 +4996,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5009,7 +5009,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5021,7 +5021,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5035,7 +5035,7 @@
           .channels(channels)
           .input_offset(464)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5049,7 +5049,7 @@
       .cr(8)
       .kr(9)
       .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__AVX2_MUL32, c_div_8) {
@@ -5059,7 +5059,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5071,7 +5071,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5083,7 +5083,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5094,7 +5094,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5105,7 +5105,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5117,7 +5117,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5129,7 +5129,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5141,7 +5141,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5155,7 +5155,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5169,7 +5169,7 @@
         .channels(8)
         .width(5)
         .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5182,7 +5182,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5195,7 +5195,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5207,7 +5207,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5221,7 +5221,7 @@
           .channels(channels)
           .input_offset(176)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5235,7 +5235,7 @@
       .cr(16)
       .kr(9)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX2_MUL32, c_div_16) {
@@ -5245,7 +5245,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5257,7 +5257,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5269,7 +5269,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5280,7 +5280,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5291,7 +5291,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5303,7 +5303,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5315,7 +5315,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5327,7 +5327,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5341,7 +5341,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5355,7 +5355,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5368,7 +5368,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5381,7 +5381,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5393,7 +5393,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5407,7 +5407,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5421,7 +5421,7 @@
       .cr(24)
       .kr(9)
       .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__AVX2_MUL32, c_div_24) {
@@ -5431,7 +5431,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5443,7 +5443,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5455,7 +5455,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5466,7 +5466,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5477,7 +5477,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5489,7 +5489,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5501,7 +5501,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5513,7 +5513,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5527,7 +5527,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5541,7 +5541,7 @@
         .channels(24)
         .width(5)
         .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5554,7 +5554,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5567,7 +5567,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5579,7 +5579,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5593,7 +5593,7 @@
           .channels(channels)
           .input_offset(464)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5607,7 +5607,7 @@
       .cr(32)
       .kr(9)
       .channels(32)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX2_MUL32, c_div_32) {
@@ -5617,7 +5617,7 @@
         .cr(32)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5629,7 +5629,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5641,7 +5641,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5652,7 +5652,7 @@
         .cr(32)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5663,7 +5663,7 @@
         .cr(32)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5675,7 +5675,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5687,7 +5687,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5699,7 +5699,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5713,7 +5713,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5727,7 +5727,7 @@
         .channels(32)
         .width(5)
         .output_stride(163)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5740,7 +5740,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5753,7 +5753,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5765,7 +5765,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(592)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5779,7 +5779,7 @@
           .channels(channels)
           .input_offset(592)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5793,7 +5793,7 @@
       .cr(16)
       .kr(9)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__AVX512SKX_MUL32, c_div_16) {
@@ -5803,7 +5803,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5815,7 +5815,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5827,7 +5827,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5838,7 +5838,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5849,7 +5849,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5861,7 +5861,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5873,7 +5873,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5885,7 +5885,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5899,7 +5899,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5913,7 +5913,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5926,7 +5926,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5939,7 +5939,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5951,7 +5951,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5965,7 +5965,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5979,7 +5979,7 @@
       .cr(32)
       .kr(9)
       .channels(32)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X9__AVX512SKX_MUL32, c_div_32) {
@@ -5989,7 +5989,7 @@
         .cr(32)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6001,7 +6001,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6013,7 +6013,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6024,7 +6024,7 @@
         .cr(32)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6035,7 +6035,7 @@
         .cr(32)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6047,7 +6047,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6059,7 +6059,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6071,7 +6071,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6085,7 +6085,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6099,7 +6099,7 @@
         .channels(32)
         .width(5)
         .output_stride(163)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6112,7 +6112,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6125,7 +6125,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6137,7 +6137,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(592)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6151,7 +6151,7 @@
           .channels(channels)
           .input_offset(592)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6164,7 +6164,7 @@
       .cr(8)
       .kr(9)
       .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X9__WASMSIMD_MUL16, c_div_8) {
@@ -6173,7 +6173,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6184,7 +6184,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6195,7 +6195,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6205,7 +6205,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6215,7 +6215,7 @@
         .cr(8)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6226,7 +6226,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6237,7 +6237,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6248,7 +6248,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6261,7 +6261,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6274,7 +6274,7 @@
         .channels(8)
         .width(5)
         .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6286,7 +6286,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6298,7 +6298,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6309,7 +6309,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6322,7 +6322,7 @@
           .channels(channels)
           .input_offset(176)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6335,7 +6335,7 @@
       .cr(16)
       .kr(9)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X9__WASMSIMD_MUL16, c_div_16) {
@@ -6344,7 +6344,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6355,7 +6355,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6366,7 +6366,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6376,7 +6376,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6386,7 +6386,7 @@
         .cr(16)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6397,7 +6397,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6408,7 +6408,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6419,7 +6419,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6432,7 +6432,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6445,7 +6445,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6457,7 +6457,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6469,7 +6469,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6480,7 +6480,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6493,7 +6493,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6506,7 +6506,7 @@
       .cr(24)
       .kr(9)
       .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X9__WASMSIMD_MUL16, c_div_24) {
@@ -6515,7 +6515,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6526,7 +6526,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6537,7 +6537,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6547,7 +6547,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6557,7 +6557,7 @@
         .cr(24)
         .kr(9)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6568,7 +6568,7 @@
         .kr(9)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6579,7 +6579,7 @@
         .kr(9)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6590,7 +6590,7 @@
         .kr(9)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6603,7 +6603,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6616,7 +6616,7 @@
         .channels(24)
         .width(5)
         .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6628,7 +6628,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6640,7 +6640,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6651,7 +6651,7 @@
         .kr(9)
         .channels(channels)
         .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6664,7 +6664,7 @@
           .channels(channels)
           .input_offset(464)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6676,7 +6676,7 @@
     .cr(1)
     .kr(9)
     .channels(1)
-    .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X9__SCALAR, c_gt_1) {
@@ -6685,7 +6685,7 @@
       .cr(1)
       .kr(9)
       .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -6696,7 +6696,7 @@
       .kr(9)
       .channels(channels)
       .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -6707,7 +6707,7 @@
       .kr(9)
       .channels(channels)
       .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -6718,7 +6718,7 @@
       .kr(9)
       .channels(channels)
       .width(3)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -6731,7 +6731,7 @@
         .channels(channels)
         .width(3)
         .step(step)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -6744,7 +6744,7 @@
       .channels(1)
       .width(5)
       .output_stride(7)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -6756,7 +6756,7 @@
       .channels(channels)
       .width(3)
       .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -6768,7 +6768,7 @@
       .channels(channels)
       .width(3)
       .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -6779,7 +6779,7 @@
       .kr(9)
       .channels(channels)
       .input_offset(48)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -6792,7 +6792,7 @@
         .channels(channels)
         .input_offset(48)
         .zero_index(mz)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -6802,7 +6802,7 @@
     .cr(2)
     .kr(9)
     .channels(2)
-    .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X9__SCALAR, c_div_2) {
@@ -6811,7 +6811,7 @@
       .cr(2)
       .kr(9)
       .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -6822,7 +6822,7 @@
       .kr(9)
       .channels(channels)
       .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -6833,7 +6833,7 @@
       .kr(9)
       .channels(channels)
       .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -6843,7 +6843,7 @@
       .cr(2)
       .kr(9)
       .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -6853,7 +6853,7 @@
       .cr(2)
       .kr(9)
       .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -6864,7 +6864,7 @@
       .kr(9)
       .channels(channels)
       .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -6875,7 +6875,7 @@
       .kr(9)
       .channels(channels)
       .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -6886,7 +6886,7 @@
       .kr(9)
       .channels(channels)
       .width(3)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -6899,7 +6899,7 @@
         .channels(channels)
         .width(3)
         .step(step)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -6912,7 +6912,7 @@
       .channels(2)
       .width(5)
       .output_stride(13)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -6924,7 +6924,7 @@
       .channels(channels)
       .width(3)
       .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -6936,7 +6936,7 @@
       .channels(channels)
       .width(3)
       .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -6947,7 +6947,7 @@
       .kr(9)
       .channels(channels)
       .input_offset(80)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -6960,7 +6960,7 @@
         .channels(channels)
         .input_offset(80)
         .zero_index(mz)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -6970,7 +6970,7 @@
     .cr(4)
     .kr(9)
     .channels(4)
-    .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X9__SCALAR, c_div_4) {
@@ -6979,7 +6979,7 @@
       .cr(4)
       .kr(9)
       .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -6990,7 +6990,7 @@
       .kr(9)
       .channels(channels)
       .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -7001,7 +7001,7 @@
       .kr(9)
       .channels(channels)
       .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -7011,7 +7011,7 @@
       .cr(4)
       .kr(9)
       .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -7021,7 +7021,7 @@
       .cr(4)
       .kr(9)
       .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -7032,7 +7032,7 @@
       .kr(9)
       .channels(channels)
       .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -7043,7 +7043,7 @@
       .kr(9)
       .channels(channels)
       .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -7054,7 +7054,7 @@
       .kr(9)
       .channels(channels)
       .width(3)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -7067,7 +7067,7 @@
         .channels(channels)
         .width(3)
         .step(step)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -7080,7 +7080,7 @@
       .channels(4)
       .width(5)
       .output_stride(23)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -7092,7 +7092,7 @@
       .channels(channels)
       .width(3)
       .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -7104,7 +7104,7 @@
       .channels(channels)
       .width(3)
       .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -7115,7 +7115,7 @@
       .kr(9)
       .channels(channels)
       .input_offset(112)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -7128,7 +7128,7 @@
         .channels(channels)
         .input_offset(112)
         .zero_index(mz)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -7140,7 +7140,7 @@
       .cr(8)
       .kr(25)
       .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__NEON_MUL16, c_div_8) {
@@ -7150,7 +7150,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7162,7 +7162,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7174,7 +7174,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7185,7 +7185,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7196,7 +7196,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7208,7 +7208,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7220,7 +7220,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7232,7 +7232,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7246,7 +7246,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7260,7 +7260,7 @@
         .channels(8)
         .width(5)
         .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7273,7 +7273,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7286,7 +7286,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7298,7 +7298,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7312,7 +7312,7 @@
           .channels(channels)
           .input_offset(176)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7326,7 +7326,7 @@
       .cr(16)
       .kr(25)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__NEON_MUL16, c_div_16) {
@@ -7336,7 +7336,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7348,7 +7348,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7360,7 +7360,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7371,7 +7371,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7382,7 +7382,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7394,7 +7394,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7406,7 +7406,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7418,7 +7418,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7432,7 +7432,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7446,7 +7446,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7459,7 +7459,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7472,7 +7472,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7484,7 +7484,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7498,7 +7498,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7512,7 +7512,7 @@
       .cr(24)
       .kr(25)
       .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__NEON_MUL16, c_div_24) {
@@ -7522,7 +7522,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7534,7 +7534,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7546,7 +7546,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7557,7 +7557,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7568,7 +7568,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7580,7 +7580,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7592,7 +7592,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7604,7 +7604,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7618,7 +7618,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7632,7 +7632,7 @@
         .channels(24)
         .width(5)
         .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7645,7 +7645,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7658,7 +7658,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7670,7 +7670,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7684,7 +7684,7 @@
           .channels(channels)
           .input_offset(464)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7698,7 +7698,7 @@
       .cr(32)
       .kr(25)
       .channels(32)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__NEON_MUL16, c_div_32) {
@@ -7708,7 +7708,7 @@
         .cr(32)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7720,7 +7720,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7732,7 +7732,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7743,7 +7743,7 @@
         .cr(32)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7754,7 +7754,7 @@
         .cr(32)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7766,7 +7766,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7778,7 +7778,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7790,7 +7790,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7804,7 +7804,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7818,7 +7818,7 @@
         .channels(32)
         .width(5)
         .output_stride(163)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7831,7 +7831,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7844,7 +7844,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7856,7 +7856,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(592)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7870,7 +7870,7 @@
           .channels(channels)
           .input_offset(592)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7884,7 +7884,7 @@
       .cr(8)
       .kr(25)
       .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE2_MUL16, c_div_8) {
@@ -7894,7 +7894,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7906,7 +7906,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7918,7 +7918,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7929,7 +7929,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7940,7 +7940,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7952,7 +7952,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7964,7 +7964,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7976,7 +7976,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7990,7 +7990,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8004,7 +8004,7 @@
         .channels(8)
         .width(5)
         .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8017,7 +8017,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8030,7 +8030,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8042,7 +8042,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8056,7 +8056,7 @@
           .channels(channels)
           .input_offset(176)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8070,7 +8070,7 @@
       .cr(16)
       .kr(25)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE2_MUL16, c_div_16) {
@@ -8080,7 +8080,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8092,7 +8092,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8104,7 +8104,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8115,7 +8115,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8126,7 +8126,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8138,7 +8138,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8150,7 +8150,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8162,7 +8162,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8176,7 +8176,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8190,7 +8190,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8203,7 +8203,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8216,7 +8216,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8228,7 +8228,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8242,7 +8242,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8256,7 +8256,7 @@
       .cr(24)
       .kr(25)
       .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE2_MUL16, c_div_24) {
@@ -8266,7 +8266,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8278,7 +8278,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8290,7 +8290,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8301,7 +8301,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8312,7 +8312,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8324,7 +8324,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8336,7 +8336,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8348,7 +8348,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8362,7 +8362,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8376,7 +8376,7 @@
         .channels(24)
         .width(5)
         .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8389,7 +8389,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8402,7 +8402,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8414,7 +8414,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8428,7 +8428,7 @@
           .channels(channels)
           .input_offset(464)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8442,7 +8442,7 @@
       .cr(8)
       .kr(25)
       .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSSE3_MUL16, c_div_8) {
@@ -8452,7 +8452,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8464,7 +8464,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8476,7 +8476,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8487,7 +8487,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8498,7 +8498,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8510,7 +8510,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8522,7 +8522,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8534,7 +8534,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8548,7 +8548,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8562,7 +8562,7 @@
         .channels(8)
         .width(5)
         .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8575,7 +8575,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8588,7 +8588,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8600,7 +8600,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8614,7 +8614,7 @@
           .channels(channels)
           .input_offset(176)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8628,7 +8628,7 @@
       .cr(16)
       .kr(25)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSSE3_MUL16, c_div_16) {
@@ -8638,7 +8638,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8650,7 +8650,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8662,7 +8662,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8673,7 +8673,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8684,7 +8684,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8696,7 +8696,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8708,7 +8708,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8720,7 +8720,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8734,7 +8734,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8748,7 +8748,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8761,7 +8761,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8774,7 +8774,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8786,7 +8786,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8800,7 +8800,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8814,7 +8814,7 @@
       .cr(24)
       .kr(25)
       .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSSE3_MUL16, c_div_24) {
@@ -8824,7 +8824,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8836,7 +8836,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8848,7 +8848,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8859,7 +8859,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8870,7 +8870,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8882,7 +8882,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8894,7 +8894,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8906,7 +8906,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8920,7 +8920,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8934,7 +8934,7 @@
         .channels(24)
         .width(5)
         .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8947,7 +8947,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8960,7 +8960,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8972,7 +8972,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8986,7 +8986,7 @@
           .channels(channels)
           .input_offset(464)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9000,7 +9000,7 @@
       .cr(8)
       .kr(25)
       .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL16, c_div_8) {
@@ -9010,7 +9010,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9022,7 +9022,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9034,7 +9034,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9045,7 +9045,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9056,7 +9056,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9068,7 +9068,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9080,7 +9080,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9092,7 +9092,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9106,7 +9106,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9120,7 +9120,7 @@
         .channels(8)
         .width(5)
         .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9133,7 +9133,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9146,7 +9146,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9158,7 +9158,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9172,7 +9172,7 @@
           .channels(channels)
           .input_offset(176)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9186,7 +9186,7 @@
       .cr(16)
       .kr(25)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL16, c_div_16) {
@@ -9196,7 +9196,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9208,7 +9208,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9220,7 +9220,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9231,7 +9231,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9242,7 +9242,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9254,7 +9254,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9266,7 +9266,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9278,7 +9278,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9292,7 +9292,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9306,7 +9306,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9319,7 +9319,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9332,7 +9332,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9344,7 +9344,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9358,7 +9358,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9372,7 +9372,7 @@
       .cr(24)
       .kr(25)
       .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL16, c_div_24) {
@@ -9382,7 +9382,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9394,7 +9394,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9406,7 +9406,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9417,7 +9417,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9428,7 +9428,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9440,7 +9440,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9452,7 +9452,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9464,7 +9464,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9478,7 +9478,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9492,7 +9492,7 @@
         .channels(24)
         .width(5)
         .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9505,7 +9505,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9518,7 +9518,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9530,7 +9530,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9544,7 +9544,7 @@
           .channels(channels)
           .input_offset(464)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9558,7 +9558,7 @@
       .cr(8)
       .kr(25)
       .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL16, c_div_8) {
@@ -9568,7 +9568,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9580,7 +9580,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9592,7 +9592,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9603,7 +9603,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9614,7 +9614,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9626,7 +9626,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9638,7 +9638,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9650,7 +9650,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9664,7 +9664,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9678,7 +9678,7 @@
         .channels(8)
         .width(5)
         .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9691,7 +9691,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9704,7 +9704,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9716,7 +9716,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9730,7 +9730,7 @@
           .channels(channels)
           .input_offset(176)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9744,7 +9744,7 @@
       .cr(16)
       .kr(25)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL16, c_div_16) {
@@ -9754,7 +9754,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9766,7 +9766,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9778,7 +9778,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9789,7 +9789,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9800,7 +9800,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9812,7 +9812,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9824,7 +9824,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9836,7 +9836,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9850,7 +9850,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9864,7 +9864,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9877,7 +9877,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9890,7 +9890,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9902,7 +9902,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9916,7 +9916,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9930,7 +9930,7 @@
       .cr(24)
       .kr(25)
       .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL16, c_div_24) {
@@ -9940,7 +9940,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9952,7 +9952,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9964,7 +9964,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9975,7 +9975,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9986,7 +9986,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9998,7 +9998,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10010,7 +10010,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10022,7 +10022,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10036,7 +10036,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10050,7 +10050,7 @@
         .channels(24)
         .width(5)
         .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10063,7 +10063,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10076,7 +10076,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10088,7 +10088,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10102,7 +10102,7 @@
           .channels(channels)
           .input_offset(464)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10116,7 +10116,7 @@
       .cr(16)
       .kr(25)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL16, c_div_16) {
@@ -10126,7 +10126,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10138,7 +10138,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10150,7 +10150,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10161,7 +10161,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10172,7 +10172,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10184,7 +10184,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10196,7 +10196,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10208,7 +10208,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10222,7 +10222,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10236,7 +10236,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10249,7 +10249,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10262,7 +10262,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10274,7 +10274,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10288,7 +10288,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10302,7 +10302,7 @@
       .cr(32)
       .kr(25)
       .channels(32)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL16, c_div_32) {
@@ -10312,7 +10312,7 @@
         .cr(32)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10324,7 +10324,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10336,7 +10336,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10347,7 +10347,7 @@
         .cr(32)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10358,7 +10358,7 @@
         .cr(32)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10370,7 +10370,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10382,7 +10382,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10394,7 +10394,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10408,7 +10408,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10422,7 +10422,7 @@
         .channels(32)
         .width(5)
         .output_stride(163)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10435,7 +10435,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10448,7 +10448,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10460,7 +10460,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(592)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10474,7 +10474,7 @@
           .channels(channels)
           .input_offset(592)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10488,7 +10488,7 @@
       .cr(8)
       .kr(25)
       .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__SSE41_MUL32, c_div_8) {
@@ -10498,7 +10498,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10510,7 +10510,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10522,7 +10522,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10533,7 +10533,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10544,7 +10544,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10556,7 +10556,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10568,7 +10568,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10580,7 +10580,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10594,7 +10594,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10608,7 +10608,7 @@
         .channels(8)
         .width(5)
         .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10621,7 +10621,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10634,7 +10634,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10646,7 +10646,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10660,7 +10660,7 @@
           .channels(channels)
           .input_offset(176)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10674,7 +10674,7 @@
       .cr(16)
       .kr(25)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__SSE41_MUL32, c_div_16) {
@@ -10684,7 +10684,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10696,7 +10696,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10708,7 +10708,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10719,7 +10719,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10730,7 +10730,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10742,7 +10742,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10754,7 +10754,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10766,7 +10766,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10780,7 +10780,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10794,7 +10794,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10807,7 +10807,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10820,7 +10820,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10832,7 +10832,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10846,7 +10846,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10860,7 +10860,7 @@
       .cr(24)
       .kr(25)
       .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__SSE41_MUL32, c_div_24) {
@@ -10870,7 +10870,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10882,7 +10882,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10894,7 +10894,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10905,7 +10905,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10916,7 +10916,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10928,7 +10928,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10940,7 +10940,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10952,7 +10952,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10966,7 +10966,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10980,7 +10980,7 @@
         .channels(24)
         .width(5)
         .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10993,7 +10993,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11006,7 +11006,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11018,7 +11018,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11032,7 +11032,7 @@
           .channels(channels)
           .input_offset(464)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11046,7 +11046,7 @@
       .cr(8)
       .kr(25)
       .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX_MUL32, c_div_8) {
@@ -11056,7 +11056,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11068,7 +11068,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11080,7 +11080,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11091,7 +11091,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11102,7 +11102,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11114,7 +11114,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11126,7 +11126,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11138,7 +11138,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11152,7 +11152,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11166,7 +11166,7 @@
         .channels(8)
         .width(5)
         .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11179,7 +11179,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11192,7 +11192,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11204,7 +11204,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11218,7 +11218,7 @@
           .channels(channels)
           .input_offset(176)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11232,7 +11232,7 @@
       .cr(16)
       .kr(25)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX_MUL32, c_div_16) {
@@ -11242,7 +11242,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11254,7 +11254,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11266,7 +11266,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11277,7 +11277,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11288,7 +11288,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11300,7 +11300,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11312,7 +11312,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11324,7 +11324,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11338,7 +11338,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11352,7 +11352,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11365,7 +11365,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11378,7 +11378,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11390,7 +11390,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11404,7 +11404,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11418,7 +11418,7 @@
       .cr(24)
       .kr(25)
       .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX_MUL32, c_div_24) {
@@ -11428,7 +11428,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11440,7 +11440,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11452,7 +11452,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11463,7 +11463,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11474,7 +11474,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11486,7 +11486,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11498,7 +11498,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11510,7 +11510,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11524,7 +11524,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11538,7 +11538,7 @@
         .channels(24)
         .width(5)
         .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11551,7 +11551,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11564,7 +11564,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11576,7 +11576,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11590,7 +11590,7 @@
           .channels(channels)
           .input_offset(464)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11604,7 +11604,7 @@
       .cr(8)
       .kr(25)
       .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__XOP_MUL32, c_div_8) {
@@ -11614,7 +11614,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11626,7 +11626,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11638,7 +11638,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11649,7 +11649,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11660,7 +11660,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11672,7 +11672,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11684,7 +11684,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11696,7 +11696,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11710,7 +11710,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11724,7 +11724,7 @@
         .channels(8)
         .width(5)
         .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11737,7 +11737,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11750,7 +11750,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11762,7 +11762,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11776,7 +11776,7 @@
           .channels(channels)
           .input_offset(176)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11790,7 +11790,7 @@
       .cr(16)
       .kr(25)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__XOP_MUL32, c_div_16) {
@@ -11800,7 +11800,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11812,7 +11812,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11824,7 +11824,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11835,7 +11835,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11846,7 +11846,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11858,7 +11858,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11870,7 +11870,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11882,7 +11882,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11896,7 +11896,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11910,7 +11910,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11923,7 +11923,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11936,7 +11936,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11948,7 +11948,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11962,7 +11962,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11976,7 +11976,7 @@
       .cr(24)
       .kr(25)
       .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__XOP_MUL32, c_div_24) {
@@ -11986,7 +11986,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11998,7 +11998,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12010,7 +12010,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12021,7 +12021,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12032,7 +12032,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12044,7 +12044,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12056,7 +12056,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12068,7 +12068,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12082,7 +12082,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12096,7 +12096,7 @@
         .channels(24)
         .width(5)
         .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12109,7 +12109,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12122,7 +12122,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12134,7 +12134,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12148,7 +12148,7 @@
           .channels(channels)
           .input_offset(464)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12162,7 +12162,7 @@
       .cr(8)
       .kr(25)
       .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__AVX2_MUL32, c_div_8) {
@@ -12172,7 +12172,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12184,7 +12184,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12196,7 +12196,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12207,7 +12207,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12218,7 +12218,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12230,7 +12230,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12242,7 +12242,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12254,7 +12254,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12268,7 +12268,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12282,7 +12282,7 @@
         .channels(8)
         .width(5)
         .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12295,7 +12295,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12308,7 +12308,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12320,7 +12320,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12334,7 +12334,7 @@
           .channels(channels)
           .input_offset(176)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12348,7 +12348,7 @@
       .cr(16)
       .kr(25)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX2_MUL32, c_div_16) {
@@ -12358,7 +12358,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12370,7 +12370,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12382,7 +12382,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12393,7 +12393,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12404,7 +12404,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12416,7 +12416,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12428,7 +12428,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12440,7 +12440,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12454,7 +12454,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12468,7 +12468,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12481,7 +12481,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12494,7 +12494,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12506,7 +12506,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12520,7 +12520,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12534,7 +12534,7 @@
       .cr(24)
       .kr(25)
       .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__AVX2_MUL32, c_div_24) {
@@ -12544,7 +12544,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12556,7 +12556,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12568,7 +12568,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12579,7 +12579,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12590,7 +12590,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12602,7 +12602,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12614,7 +12614,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12626,7 +12626,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12640,7 +12640,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12654,7 +12654,7 @@
         .channels(24)
         .width(5)
         .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12667,7 +12667,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12680,7 +12680,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12692,7 +12692,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12706,7 +12706,7 @@
           .channels(channels)
           .input_offset(464)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12720,7 +12720,7 @@
       .cr(32)
       .kr(25)
       .channels(32)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX2_MUL32, c_div_32) {
@@ -12730,7 +12730,7 @@
         .cr(32)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12742,7 +12742,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12754,7 +12754,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12765,7 +12765,7 @@
         .cr(32)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12776,7 +12776,7 @@
         .cr(32)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12788,7 +12788,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12800,7 +12800,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12812,7 +12812,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12826,7 +12826,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12840,7 +12840,7 @@
         .channels(32)
         .width(5)
         .output_stride(163)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12853,7 +12853,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12866,7 +12866,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12878,7 +12878,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(592)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12892,7 +12892,7 @@
           .channels(channels)
           .input_offset(592)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12906,7 +12906,7 @@
       .cr(16)
       .kr(25)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__AVX512SKX_MUL32, c_div_16) {
@@ -12916,7 +12916,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12928,7 +12928,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12940,7 +12940,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12951,7 +12951,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12962,7 +12962,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12974,7 +12974,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12986,7 +12986,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12998,7 +12998,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13012,7 +13012,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13026,7 +13026,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13039,7 +13039,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13052,7 +13052,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13064,7 +13064,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13078,7 +13078,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13092,7 +13092,7 @@
       .cr(32)
       .kr(25)
       .channels(32)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP32X25__AVX512SKX_MUL32, c_div_32) {
@@ -13102,7 +13102,7 @@
         .cr(32)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13114,7 +13114,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13126,7 +13126,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13137,7 +13137,7 @@
         .cr(32)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13148,7 +13148,7 @@
         .cr(32)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13160,7 +13160,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13172,7 +13172,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13184,7 +13184,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13198,7 +13198,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13212,7 +13212,7 @@
         .channels(32)
         .width(5)
         .output_stride(163)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13225,7 +13225,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13238,7 +13238,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13250,7 +13250,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(592)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13264,7 +13264,7 @@
           .channels(channels)
           .input_offset(592)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13277,7 +13277,7 @@
       .cr(8)
       .kr(25)
       .channels(8)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP8X25__WASMSIMD_MUL16, c_div_8) {
@@ -13286,7 +13286,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13297,7 +13297,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13308,7 +13308,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13318,7 +13318,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13328,7 +13328,7 @@
         .cr(8)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13339,7 +13339,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13350,7 +13350,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13361,7 +13361,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13374,7 +13374,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13387,7 +13387,7 @@
         .channels(8)
         .width(5)
         .output_stride(43)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13399,7 +13399,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13411,7 +13411,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13422,7 +13422,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(176)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13435,7 +13435,7 @@
           .channels(channels)
           .input_offset(176)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13448,7 +13448,7 @@
       .cr(16)
       .kr(25)
       .channels(16)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP16X25__WASMSIMD_MUL16, c_div_16) {
@@ -13457,7 +13457,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13468,7 +13468,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13479,7 +13479,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13489,7 +13489,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13499,7 +13499,7 @@
         .cr(16)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13510,7 +13510,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13521,7 +13521,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13532,7 +13532,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13545,7 +13545,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13558,7 +13558,7 @@
         .channels(16)
         .width(5)
         .output_stride(83)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13570,7 +13570,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13582,7 +13582,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13593,7 +13593,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(304)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13606,7 +13606,7 @@
           .channels(channels)
           .input_offset(304)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13619,7 +13619,7 @@
       .cr(24)
       .kr(25)
       .channels(24)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP24X25__WASMSIMD_MUL16, c_div_24) {
@@ -13628,7 +13628,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13639,7 +13639,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13650,7 +13650,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13660,7 +13660,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13670,7 +13670,7 @@
         .cr(24)
         .kr(25)
         .channels(channels)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13681,7 +13681,7 @@
         .kr(25)
         .channels(channels)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13692,7 +13692,7 @@
         .kr(25)
         .channels(channels)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13703,7 +13703,7 @@
         .kr(25)
         .channels(channels)
         .width(3)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13716,7 +13716,7 @@
           .channels(channels)
           .width(3)
           .step(step)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13729,7 +13729,7 @@
         .channels(24)
         .width(5)
         .output_stride(127)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13741,7 +13741,7 @@
         .channels(channels)
         .width(3)
         .qmin(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13753,7 +13753,7 @@
         .channels(channels)
         .width(3)
         .qmax(128)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13764,7 +13764,7 @@
         .kr(25)
         .channels(channels)
         .input_offset(464)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13777,7 +13777,7 @@
           .channels(channels)
           .input_offset(464)
           .zero_index(mz)
-          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13789,7 +13789,7 @@
     .cr(1)
     .kr(25)
     .channels(1)
-    .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP1X25__SCALAR, c_gt_1) {
@@ -13798,7 +13798,7 @@
       .cr(1)
       .kr(25)
       .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -13809,7 +13809,7 @@
       .kr(25)
       .channels(channels)
       .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -13820,7 +13820,7 @@
       .kr(25)
       .channels(channels)
       .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -13831,7 +13831,7 @@
       .kr(25)
       .channels(channels)
       .width(3)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -13844,7 +13844,7 @@
         .channels(channels)
         .width(3)
         .step(step)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -13857,7 +13857,7 @@
       .channels(1)
       .width(5)
       .output_stride(7)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -13869,7 +13869,7 @@
       .channels(channels)
       .width(3)
       .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -13881,7 +13881,7 @@
       .channels(channels)
       .width(3)
       .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -13892,7 +13892,7 @@
       .kr(25)
       .channels(channels)
       .input_offset(48)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -13905,7 +13905,7 @@
         .channels(channels)
         .input_offset(48)
         .zero_index(mz)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -13915,7 +13915,7 @@
     .cr(2)
     .kr(25)
     .channels(2)
-    .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP2X25__SCALAR, c_div_2) {
@@ -13924,7 +13924,7 @@
       .cr(2)
       .kr(25)
       .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -13935,7 +13935,7 @@
       .kr(25)
       .channels(channels)
       .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -13946,7 +13946,7 @@
       .kr(25)
       .channels(channels)
       .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -13956,7 +13956,7 @@
       .cr(2)
       .kr(25)
       .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -13966,7 +13966,7 @@
       .cr(2)
       .kr(25)
       .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -13977,7 +13977,7 @@
       .kr(25)
       .channels(channels)
       .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -13988,7 +13988,7 @@
       .kr(25)
       .channels(channels)
       .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -13999,7 +13999,7 @@
       .kr(25)
       .channels(channels)
       .width(3)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -14012,7 +14012,7 @@
         .channels(channels)
         .width(3)
         .step(step)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -14025,7 +14025,7 @@
       .channels(2)
       .width(5)
       .output_stride(13)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -14037,7 +14037,7 @@
       .channels(channels)
       .width(3)
       .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -14049,7 +14049,7 @@
       .channels(channels)
       .width(3)
       .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -14060,7 +14060,7 @@
       .kr(25)
       .channels(channels)
       .input_offset(80)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -14073,7 +14073,7 @@
         .channels(channels)
         .input_offset(80)
         .zero_index(mz)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -14083,7 +14083,7 @@
     .cr(4)
     .kr(25)
     .channels(4)
-    .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_DWCONV_MINMAX_GEMMLOWP_UP4X25__SCALAR, c_div_4) {
@@ -14092,7 +14092,7 @@
       .cr(4)
       .kr(25)
       .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -14103,7 +14103,7 @@
       .kr(25)
       .channels(channels)
       .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -14114,7 +14114,7 @@
       .kr(25)
       .channels(channels)
       .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -14124,7 +14124,7 @@
       .cr(4)
       .kr(25)
       .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -14134,7 +14134,7 @@
       .cr(4)
       .kr(25)
       .channels(channels)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -14145,7 +14145,7 @@
       .kr(25)
       .channels(channels)
       .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -14156,7 +14156,7 @@
       .kr(25)
       .channels(channels)
       .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -14167,7 +14167,7 @@
       .kr(25)
       .channels(channels)
       .width(3)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -14180,7 +14180,7 @@
         .channels(channels)
         .width(3)
         .step(step)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -14193,7 +14193,7 @@
       .channels(4)
       .width(5)
       .output_stride(23)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -14205,7 +14205,7 @@
       .channels(channels)
       .width(3)
       .qmin(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -14217,7 +14217,7 @@
       .channels(channels)
       .width(3)
       .qmax(128)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -14228,7 +14228,7 @@
       .kr(25)
       .channels(channels)
       .input_offset(112)
-      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -14241,7 +14241,7 @@
         .channels(channels)
         .input_offset(112)
         .zero_index(mz)
-        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
\ No newline at end of file
diff --git a/test/qs8-dwconv-minmax-gemmlowp.yaml b/test/qs8-dwconv-minmax-gemmlowp.yaml
index 7ae6558..85a500e 100644
--- a/test/qs8-dwconv-minmax-gemmlowp.yaml
+++ b/test/qs8-dwconv-minmax-gemmlowp.yaml
@@ -3,158 +3,158 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__neon_mul16
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__neon_mul16
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__neon_mul16
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__neon_mul16
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse2_mul16
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse2_mul16
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse2_mul16
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__ssse3_mul16
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__ssse3_mul16
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__ssse3_mul16
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul16
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul16
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul16
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul16
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul16
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul16
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul16
-  init: xnn_init_qs8_conv_minmax_avx2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_avx2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul16
-  init: xnn_init_qs8_conv_minmax_avx2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_avx2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__sse41_mul32
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__sse41_mul32
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__sse41_mul32
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx_mul32
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx_mul32
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx_mul32
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__xop_mul32
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__xop_mul32
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__xop_mul32
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__avx2_mul32
-  init: xnn_init_qs8_conv_minmax_avx2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_avx2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx2_mul32
-  init: xnn_init_qs8_conv_minmax_avx2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_avx2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__avx2_mul32
-  init: xnn_init_qs8_conv_minmax_avx2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_avx2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx2_mul32
-  init: xnn_init_qs8_conv_minmax_avx2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_avx2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__avx512skx_mul32
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x9__avx512skx_mul32
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x9__wasmsimd_mul16
-  init: xnn_init_qs8_conv_minmax_wasmsimd_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x9__wasmsimd_mul16
-  init: xnn_init_qs8_conv_minmax_wasmsimd_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x9__wasmsimd_mul16
-  init: xnn_init_qs8_conv_minmax_wasmsimd_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x9__scalar
-  init: xnn_init_qs8_conv_minmax_scalar_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_scalar_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x9__scalar
-  init: xnn_init_qs8_conv_minmax_scalar_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_scalar_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x9__scalar
-  init: xnn_init_qs8_conv_minmax_scalar_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_scalar_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__neon_mul16
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__neon_mul16
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__neon_mul16
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__neon_mul16
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse2_mul16
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse2_mul16
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse2_mul16
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__ssse3_mul16
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__ssse3_mul16
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__ssse3_mul16
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul16
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul16
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul16
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul16
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul16
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul16
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul16
-  init: xnn_init_qs8_conv_minmax_avx2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_avx2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul16
-  init: xnn_init_qs8_conv_minmax_avx2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_avx2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__sse41_mul32
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__sse41_mul32
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__sse41_mul32
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx_mul32
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx_mul32
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx_mul32
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__xop_mul32
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__xop_mul32
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__xop_mul32
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__avx2_mul32
-  init: xnn_init_qs8_conv_minmax_avx2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_avx2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx2_mul32
-  init: xnn_init_qs8_conv_minmax_avx2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_avx2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__avx2_mul32
-  init: xnn_init_qs8_conv_minmax_avx2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_avx2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx2_mul32
-  init: xnn_init_qs8_conv_minmax_avx2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_avx2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__avx512skx_mul32
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up32x25__avx512skx_mul32
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up8x25__wasmsimd_mul16
-  init: xnn_init_qs8_conv_minmax_wasmsimd_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up16x25__wasmsimd_mul16
-  init: xnn_init_qs8_conv_minmax_wasmsimd_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up24x25__wasmsimd_mul16
-  init: xnn_init_qs8_conv_minmax_wasmsimd_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up1x25__scalar
-  init: xnn_init_qs8_conv_minmax_scalar_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_scalar_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up2x25__scalar
-  init: xnn_init_qs8_conv_minmax_scalar_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_scalar_params
 - name: xnn_qs8_dwconv_minmax_gemmlowp_ukernel_up4x25__scalar
-  init: xnn_init_qs8_conv_minmax_scalar_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_scalar_params
diff --git a/test/qs8-gemm-minmax-fp32.cc b/test/qs8-gemm-minmax-fp32.cc
new file mode 100644
index 0000000..a1f9cf0
--- /dev/null
+++ b/test/qs8-gemm-minmax-fp32.cc
@@ -0,0 +1,1390 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+// All rights reserved.
+//
+// Copyright 2019 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+//
+// Auto-generated file. Do not edit!
+//   Specification: test/qs8-gemm-minmax-fp32.yaml
+//   Generator: tools/generate-gemm-test.py
+
+
+#include <gtest/gtest.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/isa-checks.h>
+
+#include <xnnpack/gemm.h>
+#include <xnnpack/igemm.h>
+#include <xnnpack/ppmm.h>
+#include "gemm-microkernel-tester.h"
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(1)
+      .n(8)
+      .k(8)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, strided_cn) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(1)
+      .n(8)
+      .k(8)
+      .cn_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_strided_a) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(1)
+      .n(8)
+      .k(8)
+      .a_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 8; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(8)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_m) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t m = 1; m <= 1; m++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(m)
+        .n(8)
+        .k(8)
+        .iterations(1)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_n) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 1; n <= 8; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(8)
+        .iterations(1)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k < 8; k++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(1)
+        .n(8)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8_strided_a) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k < 8; k++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(1)
+        .n(8)
+        .k(k)
+        .a_stride(11)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k < 8; k++) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(1)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 9; k < 16; k++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(1)
+        .n(8)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8_strided_a) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 9; k < 16; k++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(1)
+        .n(8)
+        .k(k)
+        .a_stride(19)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 9; k < 16; k++) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(1)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 16; k <= 80; k += 8) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(1)
+        .n(8)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8_strided_a) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 16; k <= 80; k += 8) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(1)
+        .n(8)
+        .k(k)
+        .a_stride(83)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 16; k <= 80; k += 8) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(1)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(1)
+          .n(8)
+          .k(k)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_strided_cn) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(1)
+          .n(8)
+          .k(k)
+          .cn_stride(11)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_strided_a) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(1)
+          .n(n)
+          .k(k)
+          .a_stride(43)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        for (uint32_t m = 1; m <= 1; m++) {
+          GemmMicrokernelTester()
+            .mr(1)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(1)
+          .n(8)
+          .k(k)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_strided_cn) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(1)
+          .n(n)
+          .k(k)
+          .cn_stride(11)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_strided_a) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(1)
+          .n(n)
+          .k(k)
+          .a_stride(43)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        for (uint32_t m = 1; m <= 1; m++) {
+          GemmMicrokernelTester()
+            .mr(1)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k <= 40; k += 9) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(1)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .cm_stride(11)
+            .iterations(1)
+            .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(1)
+      .n(8)
+      .k(8)
+      .qmin(128)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(1)
+      .n(8)
+      .k(8)
+      .qmax(128)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(1)
+      .n(8)
+      .k(8)
+      .cm_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(2)
+      .n(8)
+      .k(8)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, strided_cn) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(2)
+      .n(8)
+      .k(8)
+      .cn_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_strided_a) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(2)
+      .n(8)
+      .k(8)
+      .a_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 8; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(8)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_m) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t m = 1; m <= 2; m++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(m)
+        .n(8)
+        .k(8)
+        .iterations(1)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_n) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 1; n <= 8; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(8)
+        .iterations(1)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k < 8; k++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(2)
+        .n(8)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8_strided_a) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k < 8; k++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(2)
+        .n(8)
+        .k(k)
+        .a_stride(11)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k < 8; k++) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(2)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 9; k < 16; k++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(2)
+        .n(8)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8_strided_a) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 9; k < 16; k++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(2)
+        .n(8)
+        .k(k)
+        .a_stride(19)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 9; k < 16; k++) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(2)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 16; k <= 80; k += 8) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(2)
+        .n(8)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8_strided_a) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 16; k <= 80; k += 8) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(2)
+        .n(8)
+        .k(k)
+        .a_stride(83)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 16; k <= 80; k += 8) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(2)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(2)
+          .n(8)
+          .k(k)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_strided_cn) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(2)
+          .n(8)
+          .k(k)
+          .cn_stride(11)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_strided_a) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(2)
+          .n(n)
+          .k(k)
+          .a_stride(43)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        for (uint32_t m = 1; m <= 2; m++) {
+          GemmMicrokernelTester()
+            .mr(2)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(2)
+          .n(8)
+          .k(k)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_strided_cn) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(2)
+          .n(n)
+          .k(k)
+          .cn_stride(11)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_strided_a) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(2)
+          .n(n)
+          .k(k)
+          .a_stride(43)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        for (uint32_t m = 1; m <= 2; m++) {
+          GemmMicrokernelTester()
+            .mr(2)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k <= 40; k += 9) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(2)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .cm_stride(11)
+            .iterations(1)
+            .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(2)
+      .n(8)
+      .k(8)
+      .qmin(128)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(2)
+      .n(8)
+      .k(8)
+      .qmax(128)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(2)
+      .n(8)
+      .k(8)
+      .cm_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(3)
+      .n(8)
+      .k(8)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, strided_cn) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(3)
+      .n(8)
+      .k(8)
+      .cn_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_strided_a) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(3)
+      .n(8)
+      .k(8)
+      .a_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 8; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(8)
+          .iterations(1)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_m) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t m = 1; m <= 3; m++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(m)
+        .n(8)
+        .k(8)
+        .iterations(1)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_n) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 1; n <= 8; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(8)
+        .iterations(1)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k < 8; k++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(3)
+        .n(8)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8_strided_a) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k < 8; k++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(3)
+        .n(8)
+        .k(k)
+        .a_stride(11)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k < 8; k++) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(3)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 9; k < 16; k++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(3)
+        .n(8)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8_strided_a) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 9; k < 16; k++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(3)
+        .n(8)
+        .k(k)
+        .a_stride(19)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 9; k < 16; k++) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(3)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 16; k <= 80; k += 8) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(3)
+        .n(8)
+        .k(k)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8_strided_a) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 16; k <= 80; k += 8) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(3)
+        .n(8)
+        .k(k)
+        .a_stride(83)
+        .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 16; k <= 80; k += 8) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(3)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(3)
+          .n(8)
+          .k(k)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_strided_cn) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(3)
+          .n(8)
+          .k(k)
+          .cn_stride(11)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_strided_a) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(3)
+          .n(n)
+          .k(k)
+          .a_stride(43)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        for (uint32_t m = 1; m <= 3; m++) {
+          GemmMicrokernelTester()
+            .mr(3)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(3)
+          .n(8)
+          .k(k)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_strided_cn) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(3)
+          .n(n)
+          .k(k)
+          .cn_stride(11)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_strided_a) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(3)
+          .n(n)
+          .k(k)
+          .a_stride(43)
+          .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        for (uint32_t m = 1; m <= 3; m++) {
+          GemmMicrokernelTester()
+            .mr(3)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k <= 40; k += 9) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(3)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .cm_stride(11)
+            .iterations(1)
+            .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(3)
+      .n(8)
+      .k(8)
+      .qmin(128)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(3)
+      .n(8)
+      .k(8)
+      .qmax(128)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_GEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(3)
+      .n(8)
+      .k(8)
+      .cm_stride(11)
+      .Test(xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
diff --git a/test/qs8-gemm-minmax-fp32.yaml b/test/qs8-gemm-minmax-fp32.yaml
new file mode 100644
index 0000000..c18451e
--- /dev/null
+++ b/test/qs8-gemm-minmax-fp32.yaml
@@ -0,0 +1,14 @@
+# Copyright 2021 Google LLC
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+- name: xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2
+  init: xnn_init_qs8_conv_minmax_fp32_avx2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2
+  init: xnn_init_qs8_conv_minmax_fp32_avx2_params
+  k-block: 8
+- name: xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2
+  init: xnn_init_qs8_conv_minmax_fp32_avx2_params
+  k-block: 8
diff --git a/test/qs8-gemm-minmax-gemmlowp.cc b/test/qs8-gemm-minmax-gemmlowp.cc
index 607e04b..afcc4f4 100644
--- a/test/qs8-gemm-minmax-gemmlowp.cc
+++ b/test/qs8-gemm-minmax-gemmlowp.cc
@@ -33,7 +33,7 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
@@ -47,7 +47,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_strided_a) {
@@ -61,7 +61,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
@@ -77,7 +77,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -94,7 +94,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -110,7 +110,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -125,7 +125,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -141,7 +141,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -159,7 +159,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -176,7 +176,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -192,7 +192,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -210,7 +210,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -227,7 +227,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -243,7 +243,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -261,7 +261,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -279,7 +279,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -297,7 +297,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -315,7 +315,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -334,7 +334,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -352,7 +352,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -370,7 +370,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -388,7 +388,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -407,7 +407,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -428,7 +428,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -445,7 +445,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
@@ -459,7 +459,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
@@ -473,7 +473,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -489,7 +489,7 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cn) {
@@ -503,7 +503,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_strided_a) {
@@ -517,7 +517,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile) {
@@ -533,7 +533,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -550,7 +550,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -566,7 +566,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -581,7 +581,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -597,7 +597,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -615,7 +615,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -632,7 +632,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -648,7 +648,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -666,7 +666,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -683,7 +683,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -699,7 +699,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -717,7 +717,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -735,7 +735,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -753,7 +753,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -771,7 +771,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -790,7 +790,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -808,7 +808,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -826,7 +826,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -844,7 +844,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -863,7 +863,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -884,7 +884,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -901,7 +901,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmax) {
@@ -915,7 +915,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm) {
@@ -929,7 +929,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -945,7 +945,7 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, strided_cn) {
@@ -959,7 +959,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_strided_a) {
@@ -973,7 +973,7 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -989,7 +989,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1006,7 +1006,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1022,7 +1022,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1037,7 +1037,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1053,7 +1053,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1071,7 +1071,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1088,7 +1088,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1104,7 +1104,7 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1122,7 +1122,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1139,7 +1139,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1155,7 +1155,7 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1173,7 +1173,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1191,7 +1191,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1209,7 +1209,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1227,7 +1227,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1246,7 +1246,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1264,7 +1264,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1282,7 +1282,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1300,7 +1300,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1319,7 +1319,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1340,7 +1340,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1357,7 +1357,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, qmax) {
@@ -1371,7 +1371,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, strided_cm) {
@@ -1385,7 +1385,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -1401,7 +1401,7 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cn) {
@@ -1415,7 +1415,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_strided_a) {
@@ -1429,7 +1429,7 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile) {
@@ -1445,7 +1445,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1462,7 +1462,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1478,7 +1478,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1493,7 +1493,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1509,7 +1509,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1527,7 +1527,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1544,7 +1544,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1560,7 +1560,7 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1578,7 +1578,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1595,7 +1595,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1611,7 +1611,7 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1629,7 +1629,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1647,7 +1647,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1665,7 +1665,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1683,7 +1683,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1702,7 +1702,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1720,7 +1720,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1738,7 +1738,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1756,7 +1756,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1775,7 +1775,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1796,7 +1796,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1813,7 +1813,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, qmax) {
@@ -1827,7 +1827,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cm) {
@@ -1841,7 +1841,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -1857,7 +1857,7 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cn) {
@@ -1871,7 +1871,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_strided_a) {
@@ -1885,7 +1885,7 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile) {
@@ -1901,7 +1901,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1918,7 +1918,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1934,7 +1934,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1949,7 +1949,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1965,7 +1965,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1983,7 +1983,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2000,7 +2000,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2016,7 +2016,7 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2034,7 +2034,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2051,7 +2051,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2067,7 +2067,7 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2085,7 +2085,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2103,7 +2103,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2121,7 +2121,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2139,7 +2139,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2158,7 +2158,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2176,7 +2176,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2194,7 +2194,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2212,7 +2212,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2231,7 +2231,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2252,7 +2252,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2269,7 +2269,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, qmax) {
@@ -2283,7 +2283,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cm) {
@@ -2297,7 +2297,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -2313,7 +2313,7 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cn) {
@@ -2327,7 +2327,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_strided_a) {
@@ -2341,7 +2341,7 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile) {
@@ -2357,7 +2357,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2374,7 +2374,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2390,7 +2390,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2405,7 +2405,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2421,7 +2421,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2439,7 +2439,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2456,7 +2456,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2472,7 +2472,7 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2490,7 +2490,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2507,7 +2507,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2523,7 +2523,7 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2541,7 +2541,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2559,7 +2559,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2577,7 +2577,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2595,7 +2595,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2614,7 +2614,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2632,7 +2632,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2650,7 +2650,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2668,7 +2668,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2687,7 +2687,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2708,7 +2708,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2725,7 +2725,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, qmax) {
@@ -2739,7 +2739,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cm) {
@@ -2753,7 +2753,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -2769,7 +2769,7 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cn) {
@@ -2783,7 +2783,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_strided_a) {
@@ -2797,7 +2797,7 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile) {
@@ -2813,7 +2813,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2830,7 +2830,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2846,7 +2846,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2861,7 +2861,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2877,7 +2877,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2895,7 +2895,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2912,7 +2912,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2928,7 +2928,7 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2946,7 +2946,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2963,7 +2963,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2979,7 +2979,7 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2997,7 +2997,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3015,7 +3015,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3033,7 +3033,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3051,7 +3051,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3070,7 +3070,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3088,7 +3088,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3106,7 +3106,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3124,7 +3124,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3143,7 +3143,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3164,7 +3164,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3181,7 +3181,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, qmax) {
@@ -3195,7 +3195,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cm) {
@@ -3209,7 +3209,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -3225,7 +3225,7 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cn) {
@@ -3239,7 +3239,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_strided_a) {
@@ -3253,7 +3253,7 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile) {
@@ -3269,7 +3269,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3286,7 +3286,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3302,7 +3302,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3317,7 +3317,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3333,7 +3333,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3351,7 +3351,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3368,7 +3368,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3384,7 +3384,7 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3402,7 +3402,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3419,7 +3419,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3435,7 +3435,7 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3453,7 +3453,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3471,7 +3471,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3489,7 +3489,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3507,7 +3507,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3526,7 +3526,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3544,7 +3544,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3562,7 +3562,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3580,7 +3580,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3599,7 +3599,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3620,7 +3620,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3637,7 +3637,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, qmax) {
@@ -3651,7 +3651,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cm) {
@@ -3665,7 +3665,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -3681,7 +3681,7 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, strided_cn) {
@@ -3695,7 +3695,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_strided_a) {
@@ -3709,7 +3709,7 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -3725,7 +3725,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3742,7 +3742,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3758,7 +3758,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3773,7 +3773,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3789,7 +3789,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3807,7 +3807,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3824,7 +3824,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3840,7 +3840,7 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3858,7 +3858,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3875,7 +3875,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3891,7 +3891,7 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3909,7 +3909,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3927,7 +3927,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3945,7 +3945,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3963,7 +3963,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3982,7 +3982,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4000,7 +4000,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4018,7 +4018,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4036,7 +4036,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4055,7 +4055,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4076,7 +4076,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4093,7 +4093,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, qmax) {
@@ -4107,7 +4107,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, strided_cm) {
@@ -4121,7 +4121,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -4137,7 +4137,7 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cn) {
@@ -4151,7 +4151,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_strided_a) {
@@ -4165,7 +4165,7 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile) {
@@ -4181,7 +4181,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4198,7 +4198,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4214,7 +4214,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4229,7 +4229,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4245,7 +4245,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4263,7 +4263,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4280,7 +4280,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4296,7 +4296,7 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4314,7 +4314,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4331,7 +4331,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4347,7 +4347,7 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4365,7 +4365,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4383,7 +4383,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4401,7 +4401,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4419,7 +4419,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4438,7 +4438,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4456,7 +4456,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4474,7 +4474,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4492,7 +4492,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4511,7 +4511,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4532,7 +4532,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4549,7 +4549,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, qmax) {
@@ -4563,7 +4563,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cm) {
@@ -4577,7 +4577,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -4593,7 +4593,7 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, strided_cn) {
@@ -4607,7 +4607,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, k_eq_8_strided_a) {
@@ -4621,7 +4621,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, k_eq_8_subtile) {
@@ -4637,7 +4637,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4654,7 +4654,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4670,7 +4670,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4685,7 +4685,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4701,7 +4701,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4719,7 +4719,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4736,7 +4736,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4752,7 +4752,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4770,7 +4770,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4787,7 +4787,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4803,7 +4803,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4821,7 +4821,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4839,7 +4839,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4857,7 +4857,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4875,7 +4875,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4894,7 +4894,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4912,7 +4912,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4930,7 +4930,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4948,7 +4948,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4967,7 +4967,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4988,7 +4988,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5005,7 +5005,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, qmax) {
@@ -5019,7 +5019,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MULL_PADAL, strided_cm) {
@@ -5033,7 +5033,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -5049,7 +5049,7 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, strided_cn) {
@@ -5063,7 +5063,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
@@ -5077,7 +5077,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_eq_8_subtile) {
@@ -5093,7 +5093,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5110,7 +5110,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5126,7 +5126,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5141,7 +5141,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5157,7 +5157,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5175,7 +5175,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5192,7 +5192,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5208,7 +5208,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5226,7 +5226,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5243,7 +5243,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5259,7 +5259,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5277,7 +5277,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5295,7 +5295,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5313,7 +5313,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5331,7 +5331,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5350,7 +5350,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5368,7 +5368,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5386,7 +5386,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5404,7 +5404,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5423,7 +5423,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5444,7 +5444,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5461,7 +5461,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, qmax) {
@@ -5475,7 +5475,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, strided_cm) {
@@ -5489,7 +5489,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -5505,7 +5505,7 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, strided_cn) {
@@ -5519,7 +5519,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
@@ -5533,7 +5533,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_eq_8_subtile) {
@@ -5549,7 +5549,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5566,7 +5566,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5582,7 +5582,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5597,7 +5597,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5613,7 +5613,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5631,7 +5631,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5648,7 +5648,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5664,7 +5664,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5682,7 +5682,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5699,7 +5699,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5715,7 +5715,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5733,7 +5733,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5751,7 +5751,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5769,7 +5769,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5787,7 +5787,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5806,7 +5806,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5824,7 +5824,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5842,7 +5842,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5860,7 +5860,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5879,7 +5879,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5900,7 +5900,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5917,7 +5917,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, qmax) {
@@ -5931,7 +5931,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, strided_cm) {
@@ -5945,7 +5945,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -5961,7 +5961,7 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, strided_cn) {
@@ -5975,7 +5975,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
@@ -5989,7 +5989,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_eq_8_subtile) {
@@ -6005,7 +6005,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6022,7 +6022,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6038,7 +6038,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6053,7 +6053,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6069,7 +6069,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6087,7 +6087,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6104,7 +6104,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6120,7 +6120,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6138,7 +6138,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6155,7 +6155,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6171,7 +6171,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6189,7 +6189,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6207,7 +6207,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6225,7 +6225,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6243,7 +6243,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6262,7 +6262,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6280,7 +6280,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6298,7 +6298,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6316,7 +6316,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6335,7 +6335,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6356,7 +6356,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6373,7 +6373,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, qmax) {
@@ -6387,7 +6387,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, strided_cm) {
@@ -6401,7 +6401,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -6417,7 +6417,7 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, strided_cn) {
@@ -6431,7 +6431,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
@@ -6445,7 +6445,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_eq_8_subtile) {
@@ -6461,7 +6461,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6478,7 +6478,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6494,7 +6494,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6509,7 +6509,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6525,7 +6525,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6543,7 +6543,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6560,7 +6560,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6576,7 +6576,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6594,7 +6594,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6611,7 +6611,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6627,7 +6627,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6645,7 +6645,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6663,7 +6663,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6681,7 +6681,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6699,7 +6699,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6718,7 +6718,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6736,7 +6736,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6754,7 +6754,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6772,7 +6772,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6791,7 +6791,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6812,7 +6812,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6829,7 +6829,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, qmax) {
@@ -6843,7 +6843,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, strided_cm) {
@@ -6857,7 +6857,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -6873,7 +6873,7 @@
       .m(6)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, strided_cn) {
@@ -6887,7 +6887,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
@@ -6901,7 +6901,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_eq_8_subtile) {
@@ -6917,7 +6917,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6934,7 +6934,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6950,7 +6950,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6965,7 +6965,7 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6981,7 +6981,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6999,7 +6999,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7016,7 +7016,7 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7032,7 +7032,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7050,7 +7050,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7067,7 +7067,7 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7083,7 +7083,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7101,7 +7101,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7119,7 +7119,7 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7137,7 +7137,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7155,7 +7155,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7174,7 +7174,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7192,7 +7192,7 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7210,7 +7210,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7228,7 +7228,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7247,7 +7247,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7268,7 +7268,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7285,7 +7285,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, qmax) {
@@ -7299,7 +7299,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, strided_cm) {
@@ -7313,7 +7313,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -7329,7 +7329,7 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, strided_cn) {
@@ -7343,7 +7343,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
@@ -7357,7 +7357,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_eq_8_subtile) {
@@ -7373,7 +7373,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7390,7 +7390,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7406,7 +7406,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7421,7 +7421,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7437,7 +7437,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7455,7 +7455,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7472,7 +7472,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7488,7 +7488,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7506,7 +7506,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7523,7 +7523,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7539,7 +7539,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7557,7 +7557,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7575,7 +7575,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7593,7 +7593,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7611,7 +7611,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7630,7 +7630,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7648,7 +7648,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7666,7 +7666,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7684,7 +7684,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7703,7 +7703,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7724,7 +7724,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7741,7 +7741,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, qmax) {
@@ -7755,7 +7755,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, strided_cm) {
@@ -7769,7 +7769,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -7785,7 +7785,7 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, strided_cn) {
@@ -7799,7 +7799,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
@@ -7813,7 +7813,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_eq_8_subtile) {
@@ -7829,7 +7829,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7846,7 +7846,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7862,7 +7862,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7877,7 +7877,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7893,7 +7893,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7911,7 +7911,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7928,7 +7928,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7944,7 +7944,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7962,7 +7962,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7979,7 +7979,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7995,7 +7995,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8013,7 +8013,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8031,7 +8031,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8049,7 +8049,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8067,7 +8067,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8086,7 +8086,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8104,7 +8104,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8122,7 +8122,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8140,7 +8140,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8159,7 +8159,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8180,7 +8180,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8197,7 +8197,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, qmax) {
@@ -8211,7 +8211,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, strided_cm) {
@@ -8225,7 +8225,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -8241,7 +8241,7 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, strided_cn) {
@@ -8255,7 +8255,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
@@ -8269,7 +8269,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_eq_8_subtile) {
@@ -8285,7 +8285,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8302,7 +8302,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8318,7 +8318,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8333,7 +8333,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8349,7 +8349,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8367,7 +8367,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8384,7 +8384,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8400,7 +8400,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8418,7 +8418,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8435,7 +8435,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8451,7 +8451,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8469,7 +8469,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8487,7 +8487,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8505,7 +8505,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8523,7 +8523,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8542,7 +8542,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8560,7 +8560,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8578,7 +8578,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8596,7 +8596,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8615,7 +8615,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8636,7 +8636,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8653,7 +8653,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, qmax) {
@@ -8667,7 +8667,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, strided_cm) {
@@ -8681,7 +8681,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -8697,7 +8697,7 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, strided_cn) {
@@ -8711,7 +8711,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
@@ -8725,7 +8725,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_eq_8_subtile) {
@@ -8741,7 +8741,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8758,7 +8758,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8774,7 +8774,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8789,7 +8789,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8805,7 +8805,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8823,7 +8823,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8840,7 +8840,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8856,7 +8856,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8874,7 +8874,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8891,7 +8891,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8907,7 +8907,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8925,7 +8925,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8943,7 +8943,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8961,7 +8961,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8979,7 +8979,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8998,7 +8998,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9016,7 +9016,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9034,7 +9034,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9052,7 +9052,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9071,7 +9071,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9092,7 +9092,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9109,7 +9109,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, qmax) {
@@ -9123,7 +9123,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, strided_cm) {
@@ -9137,7 +9137,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -9153,7 +9153,7 @@
       .m(6)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, strided_cn) {
@@ -9167,7 +9167,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
@@ -9181,7 +9181,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_eq_8_subtile) {
@@ -9197,7 +9197,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9214,7 +9214,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9230,7 +9230,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9245,7 +9245,7 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9261,7 +9261,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9279,7 +9279,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9296,7 +9296,7 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9312,7 +9312,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9330,7 +9330,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9347,7 +9347,7 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9363,7 +9363,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9381,7 +9381,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9399,7 +9399,7 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9417,7 +9417,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9435,7 +9435,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9454,7 +9454,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9472,7 +9472,7 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9490,7 +9490,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9508,7 +9508,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9527,7 +9527,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9548,7 +9548,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9565,7 +9565,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, qmax) {
@@ -9579,7 +9579,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, strided_cm) {
@@ -9593,7 +9593,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -9609,7 +9609,7 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, strided_cn) {
@@ -9623,7 +9623,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
@@ -9637,7 +9637,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
@@ -9653,7 +9653,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9670,7 +9670,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9686,7 +9686,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9701,7 +9701,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9717,7 +9717,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9735,7 +9735,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9752,7 +9752,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9768,7 +9768,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9786,7 +9786,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9803,7 +9803,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9819,7 +9819,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9837,7 +9837,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9855,7 +9855,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9873,7 +9873,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9891,7 +9891,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9910,7 +9910,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9928,7 +9928,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9946,7 +9946,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9964,7 +9964,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9983,7 +9983,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10004,7 +10004,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10021,7 +10021,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, qmax) {
@@ -10035,7 +10035,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, strided_cm) {
@@ -10049,7 +10049,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -10065,7 +10065,7 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, strided_cn) {
@@ -10079,7 +10079,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
@@ -10093,7 +10093,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
@@ -10109,7 +10109,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10126,7 +10126,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10142,7 +10142,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10157,7 +10157,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10173,7 +10173,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10191,7 +10191,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10208,7 +10208,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10224,7 +10224,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10242,7 +10242,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10259,7 +10259,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10275,7 +10275,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10293,7 +10293,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10311,7 +10311,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10329,7 +10329,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10347,7 +10347,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10366,7 +10366,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10384,7 +10384,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10402,7 +10402,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10420,7 +10420,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10439,7 +10439,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10460,7 +10460,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10477,7 +10477,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, qmax) {
@@ -10491,7 +10491,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, strided_cm) {
@@ -10505,7 +10505,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -10521,7 +10521,7 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, strided_cn) {
@@ -10535,7 +10535,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
@@ -10549,7 +10549,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
@@ -10565,7 +10565,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10582,7 +10582,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10598,7 +10598,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10613,7 +10613,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10629,7 +10629,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10647,7 +10647,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10664,7 +10664,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10680,7 +10680,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10698,7 +10698,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10715,7 +10715,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10731,7 +10731,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10749,7 +10749,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10767,7 +10767,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10785,7 +10785,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10803,7 +10803,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10822,7 +10822,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10840,7 +10840,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10858,7 +10858,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10876,7 +10876,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10895,7 +10895,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10916,7 +10916,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10933,7 +10933,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, qmax) {
@@ -10947,7 +10947,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, strided_cm) {
@@ -10961,7 +10961,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -10977,7 +10977,7 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, strided_cn) {
@@ -10991,7 +10991,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
@@ -11005,7 +11005,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
@@ -11021,7 +11021,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11038,7 +11038,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11054,7 +11054,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11069,7 +11069,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11085,7 +11085,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11103,7 +11103,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11120,7 +11120,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11136,7 +11136,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11154,7 +11154,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11171,7 +11171,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11187,7 +11187,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11205,7 +11205,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11223,7 +11223,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11241,7 +11241,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11259,7 +11259,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11278,7 +11278,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11296,7 +11296,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11314,7 +11314,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11332,7 +11332,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11351,7 +11351,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11372,7 +11372,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11389,7 +11389,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, qmax) {
@@ -11403,7 +11403,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, strided_cm) {
@@ -11417,7 +11417,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -11433,7 +11433,7 @@
       .m(6)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, strided_cn) {
@@ -11447,7 +11447,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
@@ -11461,7 +11461,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
@@ -11477,7 +11477,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11494,7 +11494,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11510,7 +11510,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11525,7 +11525,7 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11541,7 +11541,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11559,7 +11559,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11576,7 +11576,7 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11592,7 +11592,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11610,7 +11610,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11627,7 +11627,7 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11643,7 +11643,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11661,7 +11661,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11679,7 +11679,7 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11697,7 +11697,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11715,7 +11715,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11734,7 +11734,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11752,7 +11752,7 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11770,7 +11770,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11788,7 +11788,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11807,7 +11807,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11828,7 +11828,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11845,7 +11845,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, qmax) {
@@ -11859,7 +11859,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, strided_cm) {
@@ -11873,7 +11873,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -11889,7 +11889,7 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, strided_cn) {
@@ -11903,7 +11903,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
@@ -11917,7 +11917,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
@@ -11933,7 +11933,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11950,7 +11950,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11966,7 +11966,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11981,7 +11981,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11997,7 +11997,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12015,7 +12015,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12032,7 +12032,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12048,7 +12048,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12066,7 +12066,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12083,7 +12083,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12099,7 +12099,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12117,7 +12117,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12135,7 +12135,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12153,7 +12153,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12171,7 +12171,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12190,7 +12190,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12208,7 +12208,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12226,7 +12226,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12244,7 +12244,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12263,7 +12263,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12284,7 +12284,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12301,7 +12301,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, qmax) {
@@ -12315,7 +12315,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, strided_cm) {
@@ -12329,7 +12329,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -12345,7 +12345,7 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, strided_cn) {
@@ -12359,7 +12359,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
@@ -12373,7 +12373,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
@@ -12389,7 +12389,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12406,7 +12406,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12422,7 +12422,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12437,7 +12437,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12453,7 +12453,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12471,7 +12471,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12488,7 +12488,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12504,7 +12504,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12522,7 +12522,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12539,7 +12539,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12555,7 +12555,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12573,7 +12573,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12591,7 +12591,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12609,7 +12609,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12627,7 +12627,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12646,7 +12646,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12664,7 +12664,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12682,7 +12682,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12700,7 +12700,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12719,7 +12719,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12740,7 +12740,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12757,7 +12757,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, qmax) {
@@ -12771,7 +12771,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, strided_cm) {
@@ -12785,7 +12785,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -12801,7 +12801,7 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, strided_cn) {
@@ -12815,7 +12815,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
@@ -12829,7 +12829,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
@@ -12845,7 +12845,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12862,7 +12862,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12878,7 +12878,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12893,7 +12893,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12909,7 +12909,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12927,7 +12927,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12944,7 +12944,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12960,7 +12960,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12978,7 +12978,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12995,7 +12995,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13011,7 +13011,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13029,7 +13029,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13047,7 +13047,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13065,7 +13065,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13083,7 +13083,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13102,7 +13102,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13120,7 +13120,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13138,7 +13138,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13156,7 +13156,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13175,7 +13175,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13196,7 +13196,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13213,7 +13213,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, qmax) {
@@ -13227,7 +13227,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, strided_cm) {
@@ -13241,7 +13241,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -13257,7 +13257,7 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, strided_cn) {
@@ -13271,7 +13271,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
@@ -13285,7 +13285,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
@@ -13301,7 +13301,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13318,7 +13318,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13334,7 +13334,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13349,7 +13349,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13365,7 +13365,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13383,7 +13383,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13400,7 +13400,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13416,7 +13416,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13434,7 +13434,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13451,7 +13451,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13467,7 +13467,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13485,7 +13485,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13503,7 +13503,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13521,7 +13521,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13539,7 +13539,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13558,7 +13558,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13576,7 +13576,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13594,7 +13594,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13612,7 +13612,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13631,7 +13631,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13652,7 +13652,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13669,7 +13669,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, qmax) {
@@ -13683,7 +13683,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, strided_cm) {
@@ -13697,7 +13697,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -13713,7 +13713,7 @@
       .m(6)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, strided_cn) {
@@ -13727,7 +13727,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_strided_a) {
@@ -13741,7 +13741,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
@@ -13757,7 +13757,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13774,7 +13774,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13790,7 +13790,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13805,7 +13805,7 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13821,7 +13821,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13839,7 +13839,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13856,7 +13856,7 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13872,7 +13872,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13890,7 +13890,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13907,7 +13907,7 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13923,7 +13923,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13941,7 +13941,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13959,7 +13959,7 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13977,7 +13977,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13995,7 +13995,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14014,7 +14014,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14032,7 +14032,7 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14050,7 +14050,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14068,7 +14068,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14087,7 +14087,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14108,7 +14108,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14125,7 +14125,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, qmax) {
@@ -14139,7 +14139,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, strided_cm) {
@@ -14153,7 +14153,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -14169,7 +14169,7 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, strided_cn) {
@@ -14183,7 +14183,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
@@ -14197,7 +14197,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
@@ -14213,7 +14213,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14230,7 +14230,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14246,7 +14246,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14261,7 +14261,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14277,7 +14277,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14295,7 +14295,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14312,7 +14312,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14328,7 +14328,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14346,7 +14346,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14363,7 +14363,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14379,7 +14379,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14397,7 +14397,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14415,7 +14415,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14433,7 +14433,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14451,7 +14451,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14470,7 +14470,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14488,7 +14488,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14506,7 +14506,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14524,7 +14524,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14543,7 +14543,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14564,7 +14564,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14581,7 +14581,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, qmax) {
@@ -14595,7 +14595,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, strided_cm) {
@@ -14609,7 +14609,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -14625,7 +14625,7 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, strided_cn) {
@@ -14639,7 +14639,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
@@ -14653,7 +14653,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
@@ -14669,7 +14669,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14686,7 +14686,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14702,7 +14702,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14717,7 +14717,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14733,7 +14733,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14751,7 +14751,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14768,7 +14768,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14784,7 +14784,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14802,7 +14802,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14819,7 +14819,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14835,7 +14835,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14853,7 +14853,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14871,7 +14871,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14889,7 +14889,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14907,7 +14907,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14926,7 +14926,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14944,7 +14944,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14962,7 +14962,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14980,7 +14980,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14999,7 +14999,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15020,7 +15020,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15037,7 +15037,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, qmax) {
@@ -15051,7 +15051,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, strided_cm) {
@@ -15065,7 +15065,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -15081,7 +15081,7 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, strided_cn) {
@@ -15095,7 +15095,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
@@ -15109,7 +15109,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
@@ -15125,7 +15125,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15142,7 +15142,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15158,7 +15158,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15173,7 +15173,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15189,7 +15189,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15207,7 +15207,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15224,7 +15224,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15240,7 +15240,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15258,7 +15258,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15275,7 +15275,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15291,7 +15291,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15309,7 +15309,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15327,7 +15327,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15345,7 +15345,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15363,7 +15363,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15382,7 +15382,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15400,7 +15400,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15418,7 +15418,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15436,7 +15436,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15455,7 +15455,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15476,7 +15476,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15493,7 +15493,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, qmax) {
@@ -15507,7 +15507,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, strided_cm) {
@@ -15521,7 +15521,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -15537,7 +15537,7 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, strided_cn) {
@@ -15551,7 +15551,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
@@ -15565,7 +15565,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
@@ -15581,7 +15581,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15598,7 +15598,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15614,7 +15614,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15629,7 +15629,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15645,7 +15645,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15663,7 +15663,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15680,7 +15680,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15696,7 +15696,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15714,7 +15714,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15731,7 +15731,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15747,7 +15747,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15765,7 +15765,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15783,7 +15783,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15801,7 +15801,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15819,7 +15819,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15838,7 +15838,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15856,7 +15856,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15874,7 +15874,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15892,7 +15892,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15911,7 +15911,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15932,7 +15932,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15949,7 +15949,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, qmax) {
@@ -15963,7 +15963,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, strided_cm) {
@@ -15977,7 +15977,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -15993,7 +15993,7 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, strided_cn) {
@@ -16007,7 +16007,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
@@ -16021,7 +16021,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
@@ -16037,7 +16037,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16054,7 +16054,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16070,7 +16070,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16085,7 +16085,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16101,7 +16101,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16119,7 +16119,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16136,7 +16136,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16152,7 +16152,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16170,7 +16170,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16187,7 +16187,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16203,7 +16203,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16221,7 +16221,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16239,7 +16239,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16257,7 +16257,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16275,7 +16275,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16294,7 +16294,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16312,7 +16312,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16330,7 +16330,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16348,7 +16348,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16367,7 +16367,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16388,7 +16388,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16405,7 +16405,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, qmax) {
@@ -16419,7 +16419,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, strided_cm) {
@@ -16433,7 +16433,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -16449,7 +16449,7 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, strided_cn) {
@@ -16463,7 +16463,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
@@ -16477,7 +16477,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
@@ -16493,7 +16493,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16510,7 +16510,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16526,7 +16526,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16541,7 +16541,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16557,7 +16557,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16575,7 +16575,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16592,7 +16592,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16608,7 +16608,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16626,7 +16626,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16643,7 +16643,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16659,7 +16659,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16677,7 +16677,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16695,7 +16695,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16713,7 +16713,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16731,7 +16731,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16750,7 +16750,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16768,7 +16768,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16786,7 +16786,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16804,7 +16804,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16823,7 +16823,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16844,7 +16844,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16861,7 +16861,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, qmax) {
@@ -16875,7 +16875,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, strided_cm) {
@@ -16889,7 +16889,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -16905,7 +16905,7 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, strided_cn) {
@@ -16919,7 +16919,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
@@ -16933,7 +16933,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
@@ -16949,7 +16949,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16966,7 +16966,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16982,7 +16982,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16997,7 +16997,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17013,7 +17013,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17031,7 +17031,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17048,7 +17048,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17064,7 +17064,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17082,7 +17082,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17099,7 +17099,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17115,7 +17115,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17133,7 +17133,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17151,7 +17151,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17169,7 +17169,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17187,7 +17187,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17206,7 +17206,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17224,7 +17224,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17242,7 +17242,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17260,7 +17260,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17279,7 +17279,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17300,7 +17300,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17317,7 +17317,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, qmax) {
@@ -17331,7 +17331,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, strided_cm) {
@@ -17345,7 +17345,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -17361,7 +17361,7 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, strided_cn) {
@@ -17375,7 +17375,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
@@ -17389,7 +17389,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
@@ -17405,7 +17405,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17422,7 +17422,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17438,7 +17438,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17453,7 +17453,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17469,7 +17469,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17487,7 +17487,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17504,7 +17504,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17520,7 +17520,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17538,7 +17538,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17555,7 +17555,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17571,7 +17571,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17589,7 +17589,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17607,7 +17607,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17625,7 +17625,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17643,7 +17643,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17662,7 +17662,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17680,7 +17680,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17698,7 +17698,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17716,7 +17716,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17735,7 +17735,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17756,7 +17756,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17773,7 +17773,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, qmax) {
@@ -17787,7 +17787,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, strided_cm) {
@@ -17801,7 +17801,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -17817,7 +17817,7 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
@@ -17831,7 +17831,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
@@ -17845,7 +17845,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
@@ -17861,7 +17861,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17878,7 +17878,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17894,7 +17894,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17909,7 +17909,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17925,7 +17925,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17943,7 +17943,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17960,7 +17960,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17976,7 +17976,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17994,7 +17994,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18011,7 +18011,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18027,7 +18027,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18045,7 +18045,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18063,7 +18063,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18081,7 +18081,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18099,7 +18099,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18118,7 +18118,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18136,7 +18136,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18154,7 +18154,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18172,7 +18172,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18191,7 +18191,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18212,7 +18212,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18229,7 +18229,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, qmax) {
@@ -18243,7 +18243,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
@@ -18257,7 +18257,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -18273,7 +18273,7 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
@@ -18287,7 +18287,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
@@ -18301,7 +18301,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
@@ -18317,7 +18317,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18334,7 +18334,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18350,7 +18350,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18365,7 +18365,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18381,7 +18381,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18399,7 +18399,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18416,7 +18416,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18432,7 +18432,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18450,7 +18450,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18467,7 +18467,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18483,7 +18483,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18501,7 +18501,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18519,7 +18519,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18537,7 +18537,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18555,7 +18555,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18574,7 +18574,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18592,7 +18592,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18610,7 +18610,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18628,7 +18628,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18647,7 +18647,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18668,7 +18668,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18685,7 +18685,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, qmax) {
@@ -18699,7 +18699,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
@@ -18713,7 +18713,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -18729,7 +18729,7 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
@@ -18743,7 +18743,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
@@ -18757,7 +18757,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
@@ -18773,7 +18773,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18790,7 +18790,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18806,7 +18806,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18821,7 +18821,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18837,7 +18837,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18855,7 +18855,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18872,7 +18872,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18888,7 +18888,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18906,7 +18906,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18923,7 +18923,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18939,7 +18939,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18957,7 +18957,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18975,7 +18975,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18993,7 +18993,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19011,7 +19011,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19030,7 +19030,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19048,7 +19048,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19066,7 +19066,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19084,7 +19084,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19103,7 +19103,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19124,7 +19124,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19141,7 +19141,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, qmax) {
@@ -19155,7 +19155,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
@@ -19169,7 +19169,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -19185,7 +19185,7 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
@@ -19199,7 +19199,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
@@ -19213,7 +19213,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
@@ -19229,7 +19229,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19246,7 +19246,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19262,7 +19262,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19277,7 +19277,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19293,7 +19293,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19311,7 +19311,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19328,7 +19328,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19344,7 +19344,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19362,7 +19362,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19379,7 +19379,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19395,7 +19395,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19413,7 +19413,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19431,7 +19431,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19449,7 +19449,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19467,7 +19467,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19486,7 +19486,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19504,7 +19504,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19522,7 +19522,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19540,7 +19540,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19559,7 +19559,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19580,7 +19580,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19597,7 +19597,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, qmax) {
@@ -19611,7 +19611,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
@@ -19625,7 +19625,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -19641,7 +19641,7 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
@@ -19655,7 +19655,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
@@ -19669,7 +19669,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
@@ -19685,7 +19685,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19702,7 +19702,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19718,7 +19718,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19733,7 +19733,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19749,7 +19749,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19767,7 +19767,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19784,7 +19784,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19800,7 +19800,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19818,7 +19818,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19835,7 +19835,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19851,7 +19851,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19869,7 +19869,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19887,7 +19887,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19905,7 +19905,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19923,7 +19923,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19942,7 +19942,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19960,7 +19960,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19978,7 +19978,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19996,7 +19996,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20015,7 +20015,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20036,7 +20036,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20053,7 +20053,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, qmax) {
@@ -20067,7 +20067,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
@@ -20081,7 +20081,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -20097,7 +20097,7 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
@@ -20111,7 +20111,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
@@ -20125,7 +20125,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
@@ -20141,7 +20141,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20158,7 +20158,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20174,7 +20174,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20189,7 +20189,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20205,7 +20205,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20223,7 +20223,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20240,7 +20240,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20256,7 +20256,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20274,7 +20274,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20291,7 +20291,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20307,7 +20307,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20325,7 +20325,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20343,7 +20343,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20361,7 +20361,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20379,7 +20379,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20398,7 +20398,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20416,7 +20416,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20434,7 +20434,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20452,7 +20452,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20471,7 +20471,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20492,7 +20492,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20509,7 +20509,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, qmax) {
@@ -20523,7 +20523,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
@@ -20537,7 +20537,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -20553,7 +20553,7 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
@@ -20567,7 +20567,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
@@ -20581,7 +20581,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
@@ -20597,7 +20597,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20614,7 +20614,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20630,7 +20630,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20645,7 +20645,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20661,7 +20661,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20679,7 +20679,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20696,7 +20696,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20712,7 +20712,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20730,7 +20730,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20747,7 +20747,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20763,7 +20763,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20781,7 +20781,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20799,7 +20799,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20817,7 +20817,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20835,7 +20835,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20854,7 +20854,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20872,7 +20872,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20890,7 +20890,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20908,7 +20908,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20927,7 +20927,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20948,7 +20948,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20965,7 +20965,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, qmax) {
@@ -20979,7 +20979,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
@@ -20993,7 +20993,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -21009,7 +21009,7 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
@@ -21023,7 +21023,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_eq_8_strided_a) {
@@ -21037,7 +21037,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
@@ -21053,7 +21053,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21070,7 +21070,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21086,7 +21086,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21101,7 +21101,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21117,7 +21117,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21135,7 +21135,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21152,7 +21152,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21168,7 +21168,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21186,7 +21186,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21203,7 +21203,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21219,7 +21219,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21237,7 +21237,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21255,7 +21255,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21273,7 +21273,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21291,7 +21291,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21310,7 +21310,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21328,7 +21328,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21346,7 +21346,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21364,7 +21364,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21383,7 +21383,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21404,7 +21404,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21421,7 +21421,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, qmax) {
@@ -21435,7 +21435,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
@@ -21449,7 +21449,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -21465,7 +21465,7 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
@@ -21479,7 +21479,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
@@ -21493,7 +21493,7 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
@@ -21509,7 +21509,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21526,7 +21526,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21542,7 +21542,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21557,7 +21557,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21573,7 +21573,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21591,7 +21591,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21608,7 +21608,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21624,7 +21624,7 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21642,7 +21642,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21659,7 +21659,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21675,7 +21675,7 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21693,7 +21693,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21711,7 +21711,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21729,7 +21729,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21747,7 +21747,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21766,7 +21766,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21784,7 +21784,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21802,7 +21802,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21820,7 +21820,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21839,7 +21839,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21860,7 +21860,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21877,7 +21877,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, qmax) {
@@ -21891,7 +21891,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
@@ -21905,7 +21905,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -21921,7 +21921,7 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
@@ -21935,7 +21935,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
@@ -21949,7 +21949,7 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
@@ -21965,7 +21965,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21982,7 +21982,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21998,7 +21998,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22013,7 +22013,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22029,7 +22029,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22047,7 +22047,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22064,7 +22064,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22080,7 +22080,7 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22098,7 +22098,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22115,7 +22115,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22131,7 +22131,7 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22149,7 +22149,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22167,7 +22167,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22185,7 +22185,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22203,7 +22203,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22222,7 +22222,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22240,7 +22240,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22258,7 +22258,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22276,7 +22276,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22295,7 +22295,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22316,7 +22316,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22333,7 +22333,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, qmax) {
@@ -22347,7 +22347,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
@@ -22361,7 +22361,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -22377,7 +22377,7 @@
       .m(3)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
@@ -22391,7 +22391,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
@@ -22405,7 +22405,7 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
@@ -22421,7 +22421,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22438,7 +22438,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22454,7 +22454,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22469,7 +22469,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22485,7 +22485,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22503,7 +22503,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22520,7 +22520,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22536,7 +22536,7 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22554,7 +22554,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22571,7 +22571,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22587,7 +22587,7 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22605,7 +22605,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22623,7 +22623,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22641,7 +22641,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22659,7 +22659,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22678,7 +22678,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22696,7 +22696,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22714,7 +22714,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22732,7 +22732,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22751,7 +22751,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22772,7 +22772,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22789,7 +22789,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, qmax) {
@@ -22803,7 +22803,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
@@ -22817,7 +22817,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -22833,7 +22833,7 @@
       .m(4)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
@@ -22847,7 +22847,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
@@ -22861,7 +22861,7 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
@@ -22877,7 +22877,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22894,7 +22894,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22910,7 +22910,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22925,7 +22925,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22941,7 +22941,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22959,7 +22959,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22976,7 +22976,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22992,7 +22992,7 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23010,7 +23010,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23027,7 +23027,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23043,7 +23043,7 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23061,7 +23061,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23079,7 +23079,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23097,7 +23097,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23115,7 +23115,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23134,7 +23134,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23152,7 +23152,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23170,7 +23170,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23188,7 +23188,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23207,7 +23207,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23228,7 +23228,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23245,7 +23245,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, qmax) {
@@ -23259,7 +23259,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
@@ -23273,7 +23273,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -23289,7 +23289,7 @@
       .m(1)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
@@ -23303,7 +23303,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
@@ -23317,7 +23317,7 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
@@ -23333,7 +23333,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23350,7 +23350,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23366,7 +23366,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23381,7 +23381,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23397,7 +23397,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23415,7 +23415,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23432,7 +23432,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23448,7 +23448,7 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23466,7 +23466,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23483,7 +23483,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23499,7 +23499,7 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23517,7 +23517,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23535,7 +23535,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23553,7 +23553,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23571,7 +23571,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23590,7 +23590,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23608,7 +23608,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23626,7 +23626,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23644,7 +23644,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23663,7 +23663,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23684,7 +23684,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23701,7 +23701,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, qmax) {
@@ -23715,7 +23715,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
@@ -23729,7 +23729,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -23745,7 +23745,7 @@
       .m(2)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
@@ -23759,7 +23759,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
@@ -23773,7 +23773,7 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
@@ -23789,7 +23789,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23806,7 +23806,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23822,7 +23822,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23837,7 +23837,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23853,7 +23853,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23871,7 +23871,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23888,7 +23888,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23904,7 +23904,7 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23922,7 +23922,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23939,7 +23939,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23955,7 +23955,7 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23973,7 +23973,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23991,7 +23991,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24009,7 +24009,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24027,7 +24027,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24046,7 +24046,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24064,7 +24064,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24082,7 +24082,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24100,7 +24100,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24119,7 +24119,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24140,7 +24140,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24157,7 +24157,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, qmax) {
@@ -24171,7 +24171,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
@@ -24185,7 +24185,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -24201,7 +24201,7 @@
       .m(3)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
@@ -24215,7 +24215,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
@@ -24229,7 +24229,7 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
@@ -24245,7 +24245,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24262,7 +24262,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24278,7 +24278,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24293,7 +24293,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24309,7 +24309,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24327,7 +24327,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24344,7 +24344,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24360,7 +24360,7 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24378,7 +24378,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24395,7 +24395,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24411,7 +24411,7 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24429,7 +24429,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24447,7 +24447,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24465,7 +24465,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24483,7 +24483,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24502,7 +24502,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24520,7 +24520,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24538,7 +24538,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24556,7 +24556,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24575,7 +24575,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24596,7 +24596,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24613,7 +24613,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, qmax) {
@@ -24627,7 +24627,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
@@ -24641,7 +24641,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -24657,7 +24657,7 @@
       .m(4)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
@@ -24671,7 +24671,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_strided_a) {
@@ -24685,7 +24685,7 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
@@ -24701,7 +24701,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24718,7 +24718,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24734,7 +24734,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24749,7 +24749,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24765,7 +24765,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24783,7 +24783,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24800,7 +24800,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24816,7 +24816,7 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24834,7 +24834,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24851,7 +24851,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24867,7 +24867,7 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24885,7 +24885,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24903,7 +24903,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24921,7 +24921,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24939,7 +24939,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24958,7 +24958,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24976,7 +24976,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24994,7 +24994,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25012,7 +25012,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25031,7 +25031,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25052,7 +25052,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25069,7 +25069,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, qmax) {
@@ -25083,7 +25083,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
@@ -25097,7 +25097,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -25113,7 +25113,7 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, strided_cn) {
@@ -25127,7 +25127,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
@@ -25141,7 +25141,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
@@ -25157,7 +25157,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25174,7 +25174,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25190,7 +25190,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25205,7 +25205,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25221,7 +25221,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25239,7 +25239,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25256,7 +25256,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25272,7 +25272,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25290,7 +25290,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25307,7 +25307,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25323,7 +25323,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25341,7 +25341,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25359,7 +25359,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25377,7 +25377,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25395,7 +25395,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25414,7 +25414,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25432,7 +25432,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25450,7 +25450,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25468,7 +25468,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25487,7 +25487,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25508,7 +25508,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25525,7 +25525,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, qmax) {
@@ -25539,7 +25539,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, strided_cm) {
@@ -25553,7 +25553,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -25569,7 +25569,7 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, strided_cn) {
@@ -25583,7 +25583,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
@@ -25597,7 +25597,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
@@ -25613,7 +25613,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25630,7 +25630,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25646,7 +25646,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25661,7 +25661,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25677,7 +25677,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25695,7 +25695,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25712,7 +25712,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25728,7 +25728,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25746,7 +25746,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25763,7 +25763,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25779,7 +25779,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25797,7 +25797,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25815,7 +25815,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25833,7 +25833,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25851,7 +25851,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25870,7 +25870,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25888,7 +25888,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25906,7 +25906,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25924,7 +25924,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25943,7 +25943,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25964,7 +25964,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25981,7 +25981,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, qmax) {
@@ -25995,7 +25995,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, strided_cm) {
@@ -26009,7 +26009,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -26025,7 +26025,7 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, strided_cn) {
@@ -26039,7 +26039,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
@@ -26053,7 +26053,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
@@ -26069,7 +26069,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26086,7 +26086,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26102,7 +26102,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26117,7 +26117,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26133,7 +26133,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26151,7 +26151,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26168,7 +26168,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26184,7 +26184,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26202,7 +26202,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26219,7 +26219,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26235,7 +26235,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26253,7 +26253,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26271,7 +26271,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26289,7 +26289,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26307,7 +26307,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26326,7 +26326,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26344,7 +26344,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26362,7 +26362,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26380,7 +26380,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26399,7 +26399,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26420,7 +26420,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26437,7 +26437,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, qmax) {
@@ -26451,7 +26451,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, strided_cm) {
@@ -26465,7 +26465,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -26481,7 +26481,7 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, strided_cn) {
@@ -26495,7 +26495,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
@@ -26509,7 +26509,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
@@ -26525,7 +26525,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26542,7 +26542,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26558,7 +26558,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26573,7 +26573,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26589,7 +26589,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26607,7 +26607,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26624,7 +26624,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26640,7 +26640,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26658,7 +26658,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26675,7 +26675,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26691,7 +26691,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26709,7 +26709,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26727,7 +26727,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26745,7 +26745,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26763,7 +26763,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26782,7 +26782,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26800,7 +26800,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26818,7 +26818,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26836,7 +26836,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26855,7 +26855,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26876,7 +26876,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26893,7 +26893,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, qmax) {
@@ -26907,7 +26907,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, strided_cm) {
@@ -26921,7 +26921,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -26937,7 +26937,7 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, strided_cn) {
@@ -26951,7 +26951,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
@@ -26965,7 +26965,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
@@ -26981,7 +26981,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26998,7 +26998,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27014,7 +27014,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27029,7 +27029,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27045,7 +27045,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27063,7 +27063,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27080,7 +27080,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27096,7 +27096,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27114,7 +27114,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27131,7 +27131,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27147,7 +27147,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27165,7 +27165,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27183,7 +27183,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27201,7 +27201,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27219,7 +27219,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27238,7 +27238,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27256,7 +27256,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27274,7 +27274,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27292,7 +27292,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27311,7 +27311,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27332,7 +27332,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27349,7 +27349,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, qmax) {
@@ -27363,7 +27363,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, strided_cm) {
@@ -27377,7 +27377,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -27393,7 +27393,7 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, strided_cn) {
@@ -27407,7 +27407,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
@@ -27421,7 +27421,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
@@ -27437,7 +27437,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27454,7 +27454,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27470,7 +27470,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27485,7 +27485,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27501,7 +27501,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27519,7 +27519,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27536,7 +27536,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27552,7 +27552,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27570,7 +27570,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27587,7 +27587,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27603,7 +27603,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27621,7 +27621,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27639,7 +27639,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27657,7 +27657,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27675,7 +27675,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27694,7 +27694,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27712,7 +27712,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27730,7 +27730,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27748,7 +27748,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27767,7 +27767,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27788,7 +27788,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27805,7 +27805,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, qmax) {
@@ -27819,7 +27819,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, strided_cm) {
@@ -27833,7 +27833,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -27849,7 +27849,7 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, strided_cn) {
@@ -27863,7 +27863,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
@@ -27877,7 +27877,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
@@ -27893,7 +27893,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27910,7 +27910,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27926,7 +27926,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27941,7 +27941,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27957,7 +27957,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27975,7 +27975,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27992,7 +27992,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28008,7 +28008,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28026,7 +28026,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28043,7 +28043,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28059,7 +28059,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28077,7 +28077,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28095,7 +28095,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28113,7 +28113,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28131,7 +28131,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28150,7 +28150,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28168,7 +28168,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28186,7 +28186,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28204,7 +28204,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28223,7 +28223,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28244,7 +28244,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28261,7 +28261,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, qmax) {
@@ -28275,7 +28275,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, strided_cm) {
@@ -28289,7 +28289,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -28305,7 +28305,7 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, strided_cn) {
@@ -28319,7 +28319,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_eq_8_strided_a) {
@@ -28333,7 +28333,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
@@ -28349,7 +28349,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28366,7 +28366,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28382,7 +28382,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28397,7 +28397,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28413,7 +28413,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28431,7 +28431,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28448,7 +28448,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28464,7 +28464,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28482,7 +28482,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28499,7 +28499,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28515,7 +28515,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28533,7 +28533,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28551,7 +28551,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28569,7 +28569,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28587,7 +28587,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28606,7 +28606,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28624,7 +28624,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28642,7 +28642,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28660,7 +28660,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28679,7 +28679,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28700,7 +28700,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28717,7 +28717,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, qmax) {
@@ -28731,7 +28731,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, strided_cm) {
@@ -28745,7 +28745,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -28761,7 +28761,7 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, strided_cn) {
@@ -28775,7 +28775,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
@@ -28789,7 +28789,7 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -28805,7 +28805,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28822,7 +28822,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28838,7 +28838,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28853,7 +28853,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28869,7 +28869,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28887,7 +28887,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28904,7 +28904,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28920,7 +28920,7 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28938,7 +28938,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28955,7 +28955,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28971,7 +28971,7 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28989,7 +28989,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29007,7 +29007,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29025,7 +29025,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29043,7 +29043,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29062,7 +29062,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29080,7 +29080,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29098,7 +29098,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29116,7 +29116,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29135,7 +29135,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29156,7 +29156,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29173,7 +29173,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, qmax) {
@@ -29187,7 +29187,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, strided_cm) {
@@ -29201,7 +29201,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -29217,7 +29217,7 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, strided_cn) {
@@ -29231,7 +29231,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
@@ -29245,7 +29245,7 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -29261,7 +29261,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29278,7 +29278,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29294,7 +29294,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29309,7 +29309,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29325,7 +29325,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29343,7 +29343,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29360,7 +29360,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29376,7 +29376,7 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29394,7 +29394,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29411,7 +29411,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29427,7 +29427,7 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29445,7 +29445,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29463,7 +29463,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29481,7 +29481,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29499,7 +29499,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29518,7 +29518,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29536,7 +29536,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29554,7 +29554,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29572,7 +29572,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29591,7 +29591,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29612,7 +29612,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29629,7 +29629,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, qmax) {
@@ -29643,7 +29643,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, strided_cm) {
@@ -29657,7 +29657,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -29673,7 +29673,7 @@
       .m(3)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, strided_cn) {
@@ -29687,7 +29687,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
@@ -29701,7 +29701,7 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -29717,7 +29717,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29734,7 +29734,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29750,7 +29750,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29765,7 +29765,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29781,7 +29781,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29799,7 +29799,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29816,7 +29816,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29832,7 +29832,7 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29850,7 +29850,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29867,7 +29867,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29883,7 +29883,7 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29901,7 +29901,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29919,7 +29919,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29937,7 +29937,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29955,7 +29955,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29974,7 +29974,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29992,7 +29992,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30010,7 +30010,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30028,7 +30028,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30047,7 +30047,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30068,7 +30068,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30085,7 +30085,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, qmax) {
@@ -30099,7 +30099,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, strided_cm) {
@@ -30113,7 +30113,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -30129,7 +30129,7 @@
       .m(4)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, strided_cn) {
@@ -30143,7 +30143,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
@@ -30157,7 +30157,7 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -30173,7 +30173,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30190,7 +30190,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30206,7 +30206,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30221,7 +30221,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30237,7 +30237,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30255,7 +30255,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30272,7 +30272,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30288,7 +30288,7 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30306,7 +30306,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30323,7 +30323,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30339,7 +30339,7 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30357,7 +30357,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30375,7 +30375,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30393,7 +30393,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30411,7 +30411,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30430,7 +30430,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30448,7 +30448,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30466,7 +30466,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30484,7 +30484,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30503,7 +30503,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30524,7 +30524,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30541,7 +30541,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, qmax) {
@@ -30555,7 +30555,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, strided_cm) {
@@ -30569,7 +30569,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -30585,7 +30585,7 @@
       .m(1)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, strided_cn) {
@@ -30599,7 +30599,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
@@ -30613,7 +30613,7 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -30629,7 +30629,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30646,7 +30646,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30662,7 +30662,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30677,7 +30677,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30693,7 +30693,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30711,7 +30711,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30728,7 +30728,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30744,7 +30744,7 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30762,7 +30762,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30779,7 +30779,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30795,7 +30795,7 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30813,7 +30813,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30831,7 +30831,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30849,7 +30849,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30867,7 +30867,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30886,7 +30886,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30904,7 +30904,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30922,7 +30922,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30940,7 +30940,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30959,7 +30959,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30980,7 +30980,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30997,7 +30997,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, qmax) {
@@ -31011,7 +31011,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, strided_cm) {
@@ -31025,7 +31025,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -31041,7 +31041,7 @@
       .m(2)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, strided_cn) {
@@ -31055,7 +31055,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
@@ -31069,7 +31069,7 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -31085,7 +31085,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31102,7 +31102,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31118,7 +31118,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31133,7 +31133,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31149,7 +31149,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31167,7 +31167,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31184,7 +31184,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31200,7 +31200,7 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31218,7 +31218,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31235,7 +31235,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31251,7 +31251,7 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31269,7 +31269,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31287,7 +31287,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31305,7 +31305,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31323,7 +31323,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31342,7 +31342,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31360,7 +31360,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31378,7 +31378,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31396,7 +31396,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31415,7 +31415,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31436,7 +31436,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31453,7 +31453,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, qmax) {
@@ -31467,7 +31467,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, strided_cm) {
@@ -31481,7 +31481,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -31497,7 +31497,7 @@
       .m(3)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, strided_cn) {
@@ -31511,7 +31511,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
@@ -31525,7 +31525,7 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -31541,7 +31541,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31558,7 +31558,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31574,7 +31574,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31589,7 +31589,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31605,7 +31605,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31623,7 +31623,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31640,7 +31640,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31656,7 +31656,7 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31674,7 +31674,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31691,7 +31691,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31707,7 +31707,7 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31725,7 +31725,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31743,7 +31743,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31761,7 +31761,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31779,7 +31779,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31798,7 +31798,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31816,7 +31816,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31834,7 +31834,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31852,7 +31852,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31871,7 +31871,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31892,7 +31892,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31909,7 +31909,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, qmax) {
@@ -31923,7 +31923,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, strided_cm) {
@@ -31937,7 +31937,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -31953,7 +31953,7 @@
       .m(4)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, strided_cn) {
@@ -31967,7 +31967,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_eq_16_strided_a) {
@@ -31981,7 +31981,7 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -31997,7 +31997,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32014,7 +32014,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32030,7 +32030,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32045,7 +32045,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32061,7 +32061,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32079,7 +32079,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32096,7 +32096,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32112,7 +32112,7 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32130,7 +32130,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32147,7 +32147,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32163,7 +32163,7 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32181,7 +32181,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32199,7 +32199,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32217,7 +32217,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32235,7 +32235,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32254,7 +32254,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32272,7 +32272,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32290,7 +32290,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32308,7 +32308,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32327,7 +32327,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32348,7 +32348,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32365,7 +32365,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, qmax) {
@@ -32379,7 +32379,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, strided_cm) {
@@ -32393,7 +32393,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -32409,7 +32409,7 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, strided_cn) {
@@ -32423,7 +32423,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
@@ -32437,7 +32437,7 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -32453,7 +32453,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32470,7 +32470,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32486,7 +32486,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32501,7 +32501,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32517,7 +32517,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32535,7 +32535,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32552,7 +32552,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32568,7 +32568,7 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32586,7 +32586,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32603,7 +32603,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32619,7 +32619,7 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32637,7 +32637,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32655,7 +32655,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32673,7 +32673,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32691,7 +32691,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32710,7 +32710,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32728,7 +32728,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32746,7 +32746,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32764,7 +32764,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32783,7 +32783,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32804,7 +32804,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32821,7 +32821,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, qmax) {
@@ -32835,7 +32835,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, strided_cm) {
@@ -32849,7 +32849,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -32865,7 +32865,7 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, strided_cn) {
@@ -32879,7 +32879,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
@@ -32893,7 +32893,7 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -32909,7 +32909,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32926,7 +32926,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32942,7 +32942,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32957,7 +32957,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32973,7 +32973,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32991,7 +32991,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33008,7 +33008,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33024,7 +33024,7 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33042,7 +33042,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33059,7 +33059,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33075,7 +33075,7 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33093,7 +33093,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33111,7 +33111,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33129,7 +33129,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33147,7 +33147,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33166,7 +33166,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33184,7 +33184,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33202,7 +33202,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33220,7 +33220,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33239,7 +33239,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33260,7 +33260,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33277,7 +33277,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, qmax) {
@@ -33291,7 +33291,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, strided_cm) {
@@ -33305,7 +33305,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -33321,7 +33321,7 @@
       .m(3)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, strided_cn) {
@@ -33335,7 +33335,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
@@ -33349,7 +33349,7 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -33365,7 +33365,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33382,7 +33382,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33398,7 +33398,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33413,7 +33413,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33429,7 +33429,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33447,7 +33447,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33464,7 +33464,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33480,7 +33480,7 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33498,7 +33498,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33515,7 +33515,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33531,7 +33531,7 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33549,7 +33549,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33567,7 +33567,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33585,7 +33585,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33603,7 +33603,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33622,7 +33622,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33640,7 +33640,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33658,7 +33658,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33676,7 +33676,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33695,7 +33695,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33716,7 +33716,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33733,7 +33733,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, qmax) {
@@ -33747,7 +33747,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, strided_cm) {
@@ -33761,7 +33761,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -33777,7 +33777,7 @@
       .m(4)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, strided_cn) {
@@ -33791,7 +33791,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
@@ -33805,7 +33805,7 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -33821,7 +33821,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33838,7 +33838,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33854,7 +33854,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33869,7 +33869,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33885,7 +33885,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33903,7 +33903,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33920,7 +33920,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33936,7 +33936,7 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33954,7 +33954,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33971,7 +33971,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33987,7 +33987,7 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34005,7 +34005,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34023,7 +34023,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34041,7 +34041,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34059,7 +34059,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34078,7 +34078,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34096,7 +34096,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34114,7 +34114,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34132,7 +34132,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34151,7 +34151,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34172,7 +34172,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34189,7 +34189,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, qmax) {
@@ -34203,7 +34203,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, strided_cm) {
@@ -34217,7 +34217,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -34233,7 +34233,7 @@
       .m(1)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, strided_cn) {
@@ -34247,7 +34247,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
@@ -34261,7 +34261,7 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -34277,7 +34277,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34294,7 +34294,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34310,7 +34310,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34325,7 +34325,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34341,7 +34341,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34359,7 +34359,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34376,7 +34376,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34392,7 +34392,7 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34410,7 +34410,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34427,7 +34427,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34443,7 +34443,7 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34461,7 +34461,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34479,7 +34479,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34497,7 +34497,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34515,7 +34515,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34534,7 +34534,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34552,7 +34552,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34570,7 +34570,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34588,7 +34588,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34607,7 +34607,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34628,7 +34628,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34645,7 +34645,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, qmax) {
@@ -34659,7 +34659,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, strided_cm) {
@@ -34673,7 +34673,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -34689,7 +34689,7 @@
       .m(2)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, strided_cn) {
@@ -34703,7 +34703,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
@@ -34717,7 +34717,7 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -34733,7 +34733,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34750,7 +34750,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34766,7 +34766,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34781,7 +34781,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34797,7 +34797,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34815,7 +34815,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34832,7 +34832,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34848,7 +34848,7 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34866,7 +34866,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34883,7 +34883,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34899,7 +34899,7 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34917,7 +34917,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34935,7 +34935,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34953,7 +34953,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34971,7 +34971,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34990,7 +34990,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35008,7 +35008,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35026,7 +35026,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35044,7 +35044,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35063,7 +35063,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35084,7 +35084,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35101,7 +35101,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, qmax) {
@@ -35115,7 +35115,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, strided_cm) {
@@ -35129,7 +35129,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -35145,7 +35145,7 @@
       .m(3)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, strided_cn) {
@@ -35159,7 +35159,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
@@ -35173,7 +35173,7 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -35189,7 +35189,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35206,7 +35206,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35222,7 +35222,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35237,7 +35237,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35253,7 +35253,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35271,7 +35271,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35288,7 +35288,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35304,7 +35304,7 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35322,7 +35322,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35339,7 +35339,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35355,7 +35355,7 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35373,7 +35373,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35391,7 +35391,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35409,7 +35409,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35427,7 +35427,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35446,7 +35446,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35464,7 +35464,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35482,7 +35482,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35500,7 +35500,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35519,7 +35519,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35540,7 +35540,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35557,7 +35557,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, qmax) {
@@ -35571,7 +35571,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, strided_cm) {
@@ -35585,7 +35585,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -35601,7 +35601,7 @@
       .m(4)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, strided_cn) {
@@ -35615,7 +35615,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_eq_16_strided_a) {
@@ -35629,7 +35629,7 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -35645,7 +35645,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35662,7 +35662,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35678,7 +35678,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35693,7 +35693,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35709,7 +35709,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35727,7 +35727,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35744,7 +35744,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35760,7 +35760,7 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35778,7 +35778,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35795,7 +35795,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35811,7 +35811,7 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35829,7 +35829,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35847,7 +35847,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35865,7 +35865,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35883,7 +35883,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35902,7 +35902,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35920,7 +35920,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35938,7 +35938,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35956,7 +35956,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35975,7 +35975,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35996,7 +35996,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36013,7 +36013,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, qmax) {
@@ -36027,7 +36027,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, strided_cm) {
@@ -36041,7 +36041,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -36057,7 +36057,7 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, strided_cn) {
@@ -36071,7 +36071,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_eq_8_strided_a) {
@@ -36085,7 +36085,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_eq_8_subtile) {
@@ -36101,7 +36101,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36118,7 +36118,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36134,7 +36134,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36149,7 +36149,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36165,7 +36165,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36183,7 +36183,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36200,7 +36200,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36216,7 +36216,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36234,7 +36234,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36251,7 +36251,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36267,7 +36267,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36285,7 +36285,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36303,7 +36303,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36321,7 +36321,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36339,7 +36339,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36358,7 +36358,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36376,7 +36376,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36394,7 +36394,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36412,7 +36412,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36431,7 +36431,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36452,7 +36452,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36469,7 +36469,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, qmax) {
@@ -36483,7 +36483,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, strided_cm) {
@@ -36497,7 +36497,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
@@ -36513,7 +36513,7 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, strided_cn) {
@@ -36527,7 +36527,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_eq_8_strided_a) {
@@ -36541,7 +36541,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_eq_8_subtile) {
@@ -36557,7 +36557,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36574,7 +36574,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36590,7 +36590,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36605,7 +36605,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36621,7 +36621,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36639,7 +36639,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36656,7 +36656,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36672,7 +36672,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36690,7 +36690,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36707,7 +36707,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36723,7 +36723,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36741,7 +36741,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36759,7 +36759,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36777,7 +36777,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36795,7 +36795,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36814,7 +36814,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36832,7 +36832,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36850,7 +36850,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36868,7 +36868,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36887,7 +36887,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36908,7 +36908,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36925,7 +36925,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, qmax) {
@@ -36939,7 +36939,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, strided_cm) {
@@ -36953,7 +36953,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
@@ -36969,7 +36969,7 @@
       .m(6)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, strided_cn) {
@@ -36983,7 +36983,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_eq_8_strided_a) {
@@ -36997,7 +36997,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_eq_8_subtile) {
@@ -37013,7 +37013,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37030,7 +37030,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37046,7 +37046,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37061,7 +37061,7 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37077,7 +37077,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37095,7 +37095,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37112,7 +37112,7 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37128,7 +37128,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37146,7 +37146,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37163,7 +37163,7 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37179,7 +37179,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37197,7 +37197,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37215,7 +37215,7 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37233,7 +37233,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37251,7 +37251,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37270,7 +37270,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37288,7 +37288,7 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37306,7 +37306,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37324,7 +37324,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37343,7 +37343,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37364,7 +37364,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37381,7 +37381,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, qmax) {
@@ -37395,7 +37395,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, strided_cm) {
@@ -37409,7 +37409,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
@@ -37425,7 +37425,7 @@
       .m(8)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, strided_cn) {
@@ -37439,7 +37439,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_eq_8_strided_a) {
@@ -37453,7 +37453,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_eq_8_subtile) {
@@ -37469,7 +37469,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37486,7 +37486,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37502,7 +37502,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37517,7 +37517,7 @@
         .m(8)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37533,7 +37533,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37551,7 +37551,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37568,7 +37568,7 @@
         .m(8)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37584,7 +37584,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37602,7 +37602,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37619,7 +37619,7 @@
         .m(8)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37635,7 +37635,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37653,7 +37653,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37671,7 +37671,7 @@
           .m(8)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37689,7 +37689,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37707,7 +37707,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37726,7 +37726,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37744,7 +37744,7 @@
           .m(8)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37762,7 +37762,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37780,7 +37780,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37799,7 +37799,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37820,7 +37820,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37837,7 +37837,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, qmax) {
@@ -37851,7 +37851,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, strided_cm) {
@@ -37865,7 +37865,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
@@ -37881,7 +37881,7 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, strided_cn) {
@@ -37895,7 +37895,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_eq_8_strided_a) {
@@ -37909,7 +37909,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_eq_8_subtile) {
@@ -37925,7 +37925,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37942,7 +37942,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37958,7 +37958,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37973,7 +37973,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37989,7 +37989,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38007,7 +38007,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38024,7 +38024,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38040,7 +38040,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38058,7 +38058,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38075,7 +38075,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38091,7 +38091,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38109,7 +38109,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38127,7 +38127,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38145,7 +38145,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38163,7 +38163,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38182,7 +38182,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38200,7 +38200,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38218,7 +38218,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38236,7 +38236,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38255,7 +38255,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38276,7 +38276,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38293,7 +38293,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, qmax) {
@@ -38307,7 +38307,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, strided_cm) {
@@ -38321,7 +38321,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
@@ -38337,7 +38337,7 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, strided_cn) {
@@ -38351,7 +38351,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_eq_8_strided_a) {
@@ -38365,7 +38365,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_eq_8_subtile) {
@@ -38381,7 +38381,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38398,7 +38398,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38414,7 +38414,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38429,7 +38429,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38445,7 +38445,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38463,7 +38463,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38480,7 +38480,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38496,7 +38496,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38514,7 +38514,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38531,7 +38531,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38547,7 +38547,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38565,7 +38565,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38583,7 +38583,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38601,7 +38601,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38619,7 +38619,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38638,7 +38638,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38656,7 +38656,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38674,7 +38674,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38692,7 +38692,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38711,7 +38711,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38732,7 +38732,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38749,7 +38749,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, qmax) {
@@ -38763,7 +38763,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, strided_cm) {
@@ -38777,7 +38777,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
@@ -38793,7 +38793,7 @@
       .m(6)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, strided_cn) {
@@ -38807,7 +38807,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_eq_8_strided_a) {
@@ -38821,7 +38821,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_eq_8_subtile) {
@@ -38837,7 +38837,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38854,7 +38854,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38870,7 +38870,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38885,7 +38885,7 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38901,7 +38901,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38919,7 +38919,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38936,7 +38936,7 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38952,7 +38952,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38970,7 +38970,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38987,7 +38987,7 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39003,7 +39003,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39021,7 +39021,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39039,7 +39039,7 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39057,7 +39057,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39075,7 +39075,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39094,7 +39094,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39112,7 +39112,7 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39130,7 +39130,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39148,7 +39148,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39167,7 +39167,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39188,7 +39188,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39205,7 +39205,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, qmax) {
@@ -39219,7 +39219,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, strided_cm) {
@@ -39233,7 +39233,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
@@ -39249,7 +39249,7 @@
       .m(8)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, strided_cn) {
@@ -39263,7 +39263,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_eq_8_strided_a) {
@@ -39277,7 +39277,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_eq_8_subtile) {
@@ -39293,7 +39293,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39310,7 +39310,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39326,7 +39326,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39341,7 +39341,7 @@
         .m(8)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39357,7 +39357,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39375,7 +39375,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39392,7 +39392,7 @@
         .m(8)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39408,7 +39408,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39426,7 +39426,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39443,7 +39443,7 @@
         .m(8)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39459,7 +39459,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39477,7 +39477,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39495,7 +39495,7 @@
           .m(8)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39513,7 +39513,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39531,7 +39531,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39550,7 +39550,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39568,7 +39568,7 @@
           .m(8)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39586,7 +39586,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39604,7 +39604,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39623,7 +39623,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39644,7 +39644,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39661,7 +39661,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, qmax) {
@@ -39675,7 +39675,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, strided_cm) {
@@ -39689,7 +39689,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
@@ -39705,7 +39705,7 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, strided_cn) {
@@ -39719,7 +39719,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_eq_16_strided_a) {
@@ -39733,7 +39733,7 @@
       .n(8)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -39749,7 +39749,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39766,7 +39766,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39782,7 +39782,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39797,7 +39797,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39813,7 +39813,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39831,7 +39831,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39848,7 +39848,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39864,7 +39864,7 @@
         .n(8)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39882,7 +39882,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39899,7 +39899,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39915,7 +39915,7 @@
         .n(8)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39933,7 +39933,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39951,7 +39951,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39969,7 +39969,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39987,7 +39987,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40006,7 +40006,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40024,7 +40024,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40042,7 +40042,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40060,7 +40060,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40079,7 +40079,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40100,7 +40100,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40117,7 +40117,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, qmax) {
@@ -40131,7 +40131,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, strided_cm) {
@@ -40145,7 +40145,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -40161,7 +40161,7 @@
       .m(1)
       .n(16)
       .k(4)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, strided_cn) {
@@ -40175,7 +40175,7 @@
       .n(16)
       .k(4)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, k_eq_4_strided_a) {
@@ -40189,7 +40189,7 @@
       .n(16)
       .k(4)
       .a_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile) {
@@ -40205,7 +40205,7 @@
           .n(n)
           .k(4)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40222,7 +40222,7 @@
         .n(16)
         .k(4)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40238,7 +40238,7 @@
         .n(n)
         .k(4)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40253,7 +40253,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40269,7 +40269,7 @@
         .n(16)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40287,7 +40287,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40304,7 +40304,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40320,7 +40320,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40338,7 +40338,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40355,7 +40355,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40371,7 +40371,7 @@
         .n(16)
         .k(k)
         .a_stride(43)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40389,7 +40389,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40407,7 +40407,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40425,7 +40425,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40443,7 +40443,7 @@
           .n(n)
           .k(k)
           .a_stride(23)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40462,7 +40462,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40480,7 +40480,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40498,7 +40498,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40516,7 +40516,7 @@
           .n(n)
           .k(k)
           .a_stride(23)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40535,7 +40535,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40556,7 +40556,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40573,7 +40573,7 @@
       .n(16)
       .k(4)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, qmax) {
@@ -40587,7 +40587,7 @@
       .n(16)
       .k(4)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD32, strided_cm) {
@@ -40601,7 +40601,7 @@
       .n(16)
       .k(4)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -40617,7 +40617,7 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, strided_cn) {
@@ -40631,7 +40631,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8_strided_a) {
@@ -40645,7 +40645,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile) {
@@ -40661,7 +40661,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40678,7 +40678,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40694,7 +40694,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40709,7 +40709,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40725,7 +40725,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40743,7 +40743,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40760,7 +40760,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40776,7 +40776,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40794,7 +40794,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40811,7 +40811,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40827,7 +40827,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40845,7 +40845,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40863,7 +40863,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40881,7 +40881,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40899,7 +40899,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40918,7 +40918,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40936,7 +40936,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40954,7 +40954,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40972,7 +40972,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40991,7 +40991,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41012,7 +41012,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41029,7 +41029,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, qmax) {
@@ -41043,7 +41043,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C4__AARCH64_NEONDOT_LD64, strided_cm) {
@@ -41057,7 +41057,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -41073,7 +41073,7 @@
       .m(4)
       .n(16)
       .k(4)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, strided_cn) {
@@ -41087,7 +41087,7 @@
       .n(16)
       .k(4)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4_strided_a) {
@@ -41101,7 +41101,7 @@
       .n(16)
       .k(4)
       .a_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, k_eq_4_subtile) {
@@ -41117,7 +41117,7 @@
           .n(n)
           .k(4)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41134,7 +41134,7 @@
         .n(16)
         .k(4)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41150,7 +41150,7 @@
         .n(n)
         .k(4)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41165,7 +41165,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41181,7 +41181,7 @@
         .n(16)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41199,7 +41199,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41216,7 +41216,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41232,7 +41232,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41250,7 +41250,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41267,7 +41267,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41283,7 +41283,7 @@
         .n(16)
         .k(k)
         .a_stride(43)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41301,7 +41301,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41319,7 +41319,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41337,7 +41337,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41355,7 +41355,7 @@
           .n(n)
           .k(k)
           .a_stride(23)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41374,7 +41374,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41392,7 +41392,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41410,7 +41410,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41428,7 +41428,7 @@
           .n(n)
           .k(k)
           .a_stride(23)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41447,7 +41447,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41468,7 +41468,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41485,7 +41485,7 @@
       .n(16)
       .k(4)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, qmax) {
@@ -41499,7 +41499,7 @@
       .n(16)
       .k(4)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD32, strided_cm) {
@@ -41513,7 +41513,7 @@
       .n(16)
       .k(4)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -41529,7 +41529,7 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, strided_cn) {
@@ -41543,7 +41543,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_strided_a) {
@@ -41557,7 +41557,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile) {
@@ -41573,7 +41573,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41590,7 +41590,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41606,7 +41606,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41621,7 +41621,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41637,7 +41637,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41655,7 +41655,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41672,7 +41672,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41688,7 +41688,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41706,7 +41706,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41723,7 +41723,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41739,7 +41739,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41757,7 +41757,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41775,7 +41775,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41793,7 +41793,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41811,7 +41811,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41830,7 +41830,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41848,7 +41848,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41866,7 +41866,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41884,7 +41884,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41903,7 +41903,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41924,7 +41924,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41941,7 +41941,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, qmax) {
@@ -41955,7 +41955,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, strided_cm) {
@@ -41969,7 +41969,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -41985,7 +41985,7 @@
       .m(4)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) {
@@ -41999,7 +41999,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_strided_a) {
@@ -42013,7 +42013,7 @@
       .n(16)
       .k(16)
       .a_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile) {
@@ -42029,7 +42029,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42046,7 +42046,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42062,7 +42062,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42077,7 +42077,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42093,7 +42093,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42111,7 +42111,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42128,7 +42128,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42144,7 +42144,7 @@
         .n(16)
         .k(k)
         .a_stride(37)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42162,7 +42162,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42179,7 +42179,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42195,7 +42195,7 @@
         .n(16)
         .k(k)
         .a_stride(163)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42213,7 +42213,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42231,7 +42231,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42249,7 +42249,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42267,7 +42267,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42286,7 +42286,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42304,7 +42304,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42322,7 +42322,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42340,7 +42340,7 @@
           .n(n)
           .k(k)
           .a_stride(83)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42359,7 +42359,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42380,7 +42380,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42397,7 +42397,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmax) {
@@ -42411,7 +42411,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) {
@@ -42425,7 +42425,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -42441,7 +42441,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, strided_cn) {
@@ -42455,7 +42455,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_eq_8_strided_a) {
@@ -42469,7 +42469,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_eq_8_subtile) {
@@ -42485,7 +42485,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42502,7 +42502,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42518,7 +42518,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42533,7 +42533,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42549,7 +42549,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42567,7 +42567,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42584,7 +42584,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42600,7 +42600,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42618,7 +42618,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42635,7 +42635,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42651,7 +42651,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42669,7 +42669,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42687,7 +42687,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42705,7 +42705,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42723,7 +42723,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42742,7 +42742,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42760,7 +42760,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42778,7 +42778,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42796,7 +42796,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42815,7 +42815,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42836,7 +42836,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42853,7 +42853,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, qmax) {
@@ -42867,7 +42867,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, strided_cm) {
@@ -42881,7 +42881,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -42897,7 +42897,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, strided_cn) {
@@ -42911,7 +42911,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_eq_8_strided_a) {
@@ -42925,7 +42925,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_eq_8_subtile) {
@@ -42941,7 +42941,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42958,7 +42958,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42974,7 +42974,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42989,7 +42989,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43005,7 +43005,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43023,7 +43023,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43040,7 +43040,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43056,7 +43056,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43074,7 +43074,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43091,7 +43091,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43107,7 +43107,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43125,7 +43125,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43143,7 +43143,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43161,7 +43161,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43179,7 +43179,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43198,7 +43198,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43216,7 +43216,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43234,7 +43234,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43252,7 +43252,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43271,7 +43271,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43292,7 +43292,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43309,7 +43309,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, qmax) {
@@ -43323,7 +43323,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, strided_cm) {
@@ -43337,7 +43337,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -43353,7 +43353,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, strided_cn) {
@@ -43367,7 +43367,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_eq_8_strided_a) {
@@ -43381,7 +43381,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_eq_8_subtile) {
@@ -43397,7 +43397,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43414,7 +43414,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43430,7 +43430,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43445,7 +43445,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43461,7 +43461,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43479,7 +43479,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43496,7 +43496,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43512,7 +43512,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43530,7 +43530,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43547,7 +43547,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43563,7 +43563,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43581,7 +43581,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43599,7 +43599,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43617,7 +43617,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43635,7 +43635,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43654,7 +43654,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43672,7 +43672,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43690,7 +43690,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43708,7 +43708,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43727,7 +43727,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43748,7 +43748,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43765,7 +43765,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, qmax) {
@@ -43779,7 +43779,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, strided_cm) {
@@ -43793,7 +43793,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -43809,7 +43809,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, strided_cn) {
@@ -43823,7 +43823,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_eq_8_strided_a) {
@@ -43837,7 +43837,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_eq_8_subtile) {
@@ -43853,7 +43853,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43870,7 +43870,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43886,7 +43886,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43901,7 +43901,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43917,7 +43917,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43935,7 +43935,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43952,7 +43952,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43968,7 +43968,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43986,7 +43986,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44003,7 +44003,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44019,7 +44019,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44037,7 +44037,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44055,7 +44055,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44073,7 +44073,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44091,7 +44091,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44110,7 +44110,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44128,7 +44128,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44146,7 +44146,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44164,7 +44164,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44183,7 +44183,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44204,7 +44204,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44221,7 +44221,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, qmax) {
@@ -44235,7 +44235,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, strided_cm) {
@@ -44249,7 +44249,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -44265,7 +44265,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, strided_cn) {
@@ -44279,7 +44279,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_eq_8_strided_a) {
@@ -44293,7 +44293,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_eq_8_subtile) {
@@ -44309,7 +44309,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44326,7 +44326,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44342,7 +44342,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44357,7 +44357,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44373,7 +44373,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44391,7 +44391,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44408,7 +44408,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44424,7 +44424,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44442,7 +44442,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44459,7 +44459,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44475,7 +44475,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44493,7 +44493,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44511,7 +44511,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44529,7 +44529,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44547,7 +44547,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44566,7 +44566,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44584,7 +44584,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44602,7 +44602,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44620,7 +44620,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44639,7 +44639,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44660,7 +44660,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44677,7 +44677,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, qmax) {
@@ -44691,7 +44691,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, strided_cm) {
@@ -44705,7 +44705,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -44721,7 +44721,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, strided_cn) {
@@ -44735,7 +44735,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_eq_8_strided_a) {
@@ -44749,7 +44749,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_eq_8_subtile) {
@@ -44765,7 +44765,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44782,7 +44782,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44798,7 +44798,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44813,7 +44813,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44829,7 +44829,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44847,7 +44847,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44864,7 +44864,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44880,7 +44880,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44898,7 +44898,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44915,7 +44915,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44931,7 +44931,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44949,7 +44949,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44967,7 +44967,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44985,7 +44985,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45003,7 +45003,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45022,7 +45022,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45040,7 +45040,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45058,7 +45058,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45076,7 +45076,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45095,7 +45095,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45116,7 +45116,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45133,7 +45133,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, qmax) {
@@ -45147,7 +45147,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, strided_cm) {
@@ -45161,7 +45161,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -45177,7 +45177,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, strided_cn) {
@@ -45191,7 +45191,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_eq_8_strided_a) {
@@ -45205,7 +45205,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_eq_8_subtile) {
@@ -45221,7 +45221,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45238,7 +45238,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45254,7 +45254,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45269,7 +45269,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45285,7 +45285,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45303,7 +45303,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45320,7 +45320,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45336,7 +45336,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45354,7 +45354,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45371,7 +45371,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45387,7 +45387,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45405,7 +45405,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45423,7 +45423,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45441,7 +45441,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45459,7 +45459,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45478,7 +45478,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45496,7 +45496,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45514,7 +45514,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45532,7 +45532,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45551,7 +45551,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45572,7 +45572,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45589,7 +45589,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, qmax) {
@@ -45603,7 +45603,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, strided_cm) {
@@ -45617,7 +45617,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -45633,7 +45633,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, strided_cn) {
@@ -45647,7 +45647,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_eq_8_strided_a) {
@@ -45661,7 +45661,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_eq_8_subtile) {
@@ -45677,7 +45677,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45694,7 +45694,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45710,7 +45710,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45725,7 +45725,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45741,7 +45741,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45759,7 +45759,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45776,7 +45776,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45792,7 +45792,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45810,7 +45810,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45827,7 +45827,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45843,7 +45843,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45861,7 +45861,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45879,7 +45879,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45897,7 +45897,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45915,7 +45915,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45934,7 +45934,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45952,7 +45952,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45970,7 +45970,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45988,7 +45988,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46007,7 +46007,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46028,7 +46028,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46045,7 +46045,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, qmax) {
@@ -46059,7 +46059,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, strided_cm) {
@@ -46073,7 +46073,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -46089,7 +46089,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, strided_cn) {
@@ -46103,7 +46103,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_eq_8_strided_a) {
@@ -46117,7 +46117,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_eq_8_subtile) {
@@ -46133,7 +46133,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46150,7 +46150,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46166,7 +46166,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46181,7 +46181,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46197,7 +46197,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46215,7 +46215,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46232,7 +46232,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46248,7 +46248,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46266,7 +46266,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46283,7 +46283,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46299,7 +46299,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46317,7 +46317,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46335,7 +46335,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46353,7 +46353,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46371,7 +46371,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46390,7 +46390,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46408,7 +46408,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46426,7 +46426,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46444,7 +46444,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46463,7 +46463,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46484,7 +46484,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46501,7 +46501,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, qmax) {
@@ -46515,7 +46515,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, strided_cm) {
@@ -46529,7 +46529,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -46545,7 +46545,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, strided_cn) {
@@ -46559,7 +46559,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_eq_8_strided_a) {
@@ -46573,7 +46573,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_eq_8_subtile) {
@@ -46589,7 +46589,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46606,7 +46606,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46622,7 +46622,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46637,7 +46637,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46653,7 +46653,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46671,7 +46671,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46688,7 +46688,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46704,7 +46704,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46722,7 +46722,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46739,7 +46739,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46755,7 +46755,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46773,7 +46773,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46791,7 +46791,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46809,7 +46809,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46827,7 +46827,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46846,7 +46846,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46864,7 +46864,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46882,7 +46882,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46900,7 +46900,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46919,7 +46919,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46940,7 +46940,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46957,7 +46957,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, qmax) {
@@ -46971,7 +46971,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, strided_cm) {
@@ -46985,7 +46985,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -47001,7 +47001,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, strided_cn) {
@@ -47015,7 +47015,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_eq_8_strided_a) {
@@ -47029,7 +47029,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_eq_8_subtile) {
@@ -47045,7 +47045,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47062,7 +47062,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47078,7 +47078,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47093,7 +47093,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47109,7 +47109,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47127,7 +47127,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47144,7 +47144,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47160,7 +47160,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47178,7 +47178,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47195,7 +47195,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47211,7 +47211,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47229,7 +47229,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47247,7 +47247,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47265,7 +47265,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47283,7 +47283,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47302,7 +47302,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47320,7 +47320,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47338,7 +47338,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47356,7 +47356,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47375,7 +47375,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47396,7 +47396,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47413,7 +47413,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, qmax) {
@@ -47427,7 +47427,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, strided_cm) {
@@ -47441,7 +47441,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -47457,7 +47457,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, strided_cn) {
@@ -47471,7 +47471,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_eq_8_strided_a) {
@@ -47485,7 +47485,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_eq_8_subtile) {
@@ -47501,7 +47501,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47518,7 +47518,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47534,7 +47534,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47549,7 +47549,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47565,7 +47565,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47583,7 +47583,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47600,7 +47600,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47616,7 +47616,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47634,7 +47634,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47651,7 +47651,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47667,7 +47667,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47685,7 +47685,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47703,7 +47703,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47721,7 +47721,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47739,7 +47739,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47758,7 +47758,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47776,7 +47776,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47794,7 +47794,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47812,7 +47812,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47831,7 +47831,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47852,7 +47852,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47869,7 +47869,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, qmax) {
@@ -47883,7 +47883,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, strided_cm) {
@@ -47897,7 +47897,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -47913,7 +47913,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, strided_cn) {
@@ -47927,7 +47927,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, k_eq_8_strided_a) {
@@ -47941,7 +47941,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, k_eq_8_subtile) {
@@ -47957,7 +47957,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47974,7 +47974,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47990,7 +47990,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48005,7 +48005,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48021,7 +48021,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48039,7 +48039,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48056,7 +48056,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48072,7 +48072,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48090,7 +48090,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48107,7 +48107,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48123,7 +48123,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48141,7 +48141,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48159,7 +48159,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48177,7 +48177,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48195,7 +48195,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48214,7 +48214,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48232,7 +48232,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48250,7 +48250,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48268,7 +48268,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48287,7 +48287,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48308,7 +48308,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48325,7 +48325,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, qmax) {
@@ -48339,7 +48339,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD64, strided_cm) {
@@ -48353,7 +48353,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -48369,7 +48369,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, strided_cn) {
@@ -48383,7 +48383,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, k_eq_8_strided_a) {
@@ -48397,7 +48397,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, k_eq_8_subtile) {
@@ -48413,7 +48413,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48430,7 +48430,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48446,7 +48446,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48461,7 +48461,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48477,7 +48477,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48495,7 +48495,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48512,7 +48512,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48528,7 +48528,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48546,7 +48546,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48563,7 +48563,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48579,7 +48579,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48597,7 +48597,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48615,7 +48615,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48633,7 +48633,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48651,7 +48651,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48670,7 +48670,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48688,7 +48688,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48706,7 +48706,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48724,7 +48724,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48743,7 +48743,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48764,7 +48764,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48781,7 +48781,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, qmax) {
@@ -48795,7 +48795,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD64, strided_cm) {
@@ -48809,7 +48809,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -48825,7 +48825,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, strided_cn) {
@@ -48839,7 +48839,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, k_eq_8_strided_a) {
@@ -48853,7 +48853,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, k_eq_8_subtile) {
@@ -48869,7 +48869,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48886,7 +48886,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48902,7 +48902,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48917,7 +48917,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48933,7 +48933,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48951,7 +48951,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48968,7 +48968,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48984,7 +48984,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49002,7 +49002,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49019,7 +49019,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49035,7 +49035,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49053,7 +49053,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49071,7 +49071,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49089,7 +49089,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49107,7 +49107,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49126,7 +49126,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49144,7 +49144,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49162,7 +49162,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49180,7 +49180,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49199,7 +49199,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49220,7 +49220,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49237,7 +49237,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, qmax) {
@@ -49251,7 +49251,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD64, strided_cm) {
@@ -49265,7 +49265,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -49281,7 +49281,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, strided_cn) {
@@ -49295,7 +49295,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, k_eq_8_strided_a) {
@@ -49309,7 +49309,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, k_eq_8_subtile) {
@@ -49325,7 +49325,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49342,7 +49342,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49358,7 +49358,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49373,7 +49373,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49389,7 +49389,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49407,7 +49407,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49424,7 +49424,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49440,7 +49440,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49458,7 +49458,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49475,7 +49475,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49491,7 +49491,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49509,7 +49509,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49527,7 +49527,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49545,7 +49545,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49563,7 +49563,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49582,7 +49582,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49600,7 +49600,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49618,7 +49618,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49636,7 +49636,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49655,7 +49655,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49676,7 +49676,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49693,7 +49693,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, qmax) {
@@ -49707,7 +49707,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD64, strided_cm) {
@@ -49721,7 +49721,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -49737,7 +49737,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, strided_cn) {
@@ -49751,7 +49751,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_eq_8_strided_a) {
@@ -49765,7 +49765,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_eq_8_subtile) {
@@ -49781,7 +49781,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49798,7 +49798,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49814,7 +49814,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49829,7 +49829,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49845,7 +49845,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49863,7 +49863,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49880,7 +49880,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49896,7 +49896,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49914,7 +49914,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49931,7 +49931,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49947,7 +49947,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49965,7 +49965,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49983,7 +49983,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50001,7 +50001,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50019,7 +50019,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50038,7 +50038,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50056,7 +50056,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50074,7 +50074,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50092,7 +50092,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50111,7 +50111,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50132,7 +50132,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50149,7 +50149,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, qmax) {
@@ -50163,7 +50163,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, strided_cm) {
@@ -50177,7 +50177,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -50193,7 +50193,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, strided_cn) {
@@ -50207,7 +50207,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_eq_8_strided_a) {
@@ -50221,7 +50221,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_eq_8_subtile) {
@@ -50237,7 +50237,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50254,7 +50254,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50270,7 +50270,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50285,7 +50285,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50301,7 +50301,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50319,7 +50319,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50336,7 +50336,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50352,7 +50352,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50370,7 +50370,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50387,7 +50387,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50403,7 +50403,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50421,7 +50421,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50439,7 +50439,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50457,7 +50457,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50475,7 +50475,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50494,7 +50494,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50512,7 +50512,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50530,7 +50530,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50548,7 +50548,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50567,7 +50567,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50588,7 +50588,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50605,7 +50605,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, qmax) {
@@ -50619,7 +50619,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, strided_cm) {
@@ -50633,7 +50633,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -50649,7 +50649,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, strided_cn) {
@@ -50663,7 +50663,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_eq_8_strided_a) {
@@ -50677,7 +50677,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_eq_8_subtile) {
@@ -50693,7 +50693,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50710,7 +50710,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50726,7 +50726,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50741,7 +50741,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50757,7 +50757,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50775,7 +50775,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50792,7 +50792,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50808,7 +50808,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50826,7 +50826,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50843,7 +50843,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50859,7 +50859,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50877,7 +50877,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50895,7 +50895,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50913,7 +50913,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50931,7 +50931,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50950,7 +50950,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50968,7 +50968,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50986,7 +50986,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51004,7 +51004,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51023,7 +51023,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51044,7 +51044,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51061,7 +51061,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, qmax) {
@@ -51075,7 +51075,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, strided_cm) {
@@ -51089,7 +51089,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -51105,7 +51105,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, strided_cn) {
@@ -51119,7 +51119,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_eq_8_strided_a) {
@@ -51133,7 +51133,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_eq_8_subtile) {
@@ -51149,7 +51149,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51166,7 +51166,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51182,7 +51182,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51197,7 +51197,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51213,7 +51213,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51231,7 +51231,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51248,7 +51248,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51264,7 +51264,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51282,7 +51282,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51299,7 +51299,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51315,7 +51315,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51333,7 +51333,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51351,7 +51351,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51369,7 +51369,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51387,7 +51387,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51406,7 +51406,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51424,7 +51424,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51442,7 +51442,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51460,7 +51460,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51479,7 +51479,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51500,7 +51500,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51517,7 +51517,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, qmax) {
@@ -51531,7 +51531,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, strided_cm) {
@@ -51545,7 +51545,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -51561,7 +51561,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, strided_cn) {
@@ -51575,7 +51575,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_eq_8_strided_a) {
@@ -51589,7 +51589,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_eq_8_subtile) {
@@ -51605,7 +51605,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51622,7 +51622,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51638,7 +51638,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51653,7 +51653,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51669,7 +51669,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51687,7 +51687,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51704,7 +51704,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51720,7 +51720,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51738,7 +51738,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51755,7 +51755,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51771,7 +51771,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51789,7 +51789,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51807,7 +51807,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51825,7 +51825,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51843,7 +51843,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51862,7 +51862,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51880,7 +51880,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51898,7 +51898,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51916,7 +51916,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51935,7 +51935,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51956,7 +51956,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51973,7 +51973,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, qmax) {
@@ -51987,7 +51987,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, strided_cm) {
@@ -52001,7 +52001,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -52017,7 +52017,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, strided_cn) {
@@ -52031,7 +52031,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_eq_8_strided_a) {
@@ -52045,7 +52045,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_eq_8_subtile) {
@@ -52061,7 +52061,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52078,7 +52078,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52094,7 +52094,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52109,7 +52109,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52125,7 +52125,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52143,7 +52143,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52160,7 +52160,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52176,7 +52176,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52194,7 +52194,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52211,7 +52211,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52227,7 +52227,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52245,7 +52245,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52263,7 +52263,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52281,7 +52281,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52299,7 +52299,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52318,7 +52318,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52336,7 +52336,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52354,7 +52354,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52372,7 +52372,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52391,7 +52391,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52412,7 +52412,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52429,7 +52429,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, qmax) {
@@ -52443,7 +52443,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, strided_cm) {
@@ -52457,7 +52457,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -52473,7 +52473,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, strided_cn) {
@@ -52487,7 +52487,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_eq_8_strided_a) {
@@ -52501,7 +52501,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_eq_8_subtile) {
@@ -52517,7 +52517,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52534,7 +52534,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52550,7 +52550,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52565,7 +52565,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52581,7 +52581,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52599,7 +52599,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52616,7 +52616,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52632,7 +52632,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52650,7 +52650,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52667,7 +52667,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52683,7 +52683,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52701,7 +52701,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52719,7 +52719,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52737,7 +52737,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52755,7 +52755,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52774,7 +52774,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52792,7 +52792,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52810,7 +52810,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52828,7 +52828,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52847,7 +52847,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52868,7 +52868,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52885,7 +52885,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, qmax) {
@@ -52899,7 +52899,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, strided_cm) {
@@ -52913,7 +52913,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -52929,7 +52929,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, strided_cn) {
@@ -52943,7 +52943,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_eq_8_strided_a) {
@@ -52957,7 +52957,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_eq_8_subtile) {
@@ -52973,7 +52973,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52990,7 +52990,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53006,7 +53006,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53021,7 +53021,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53037,7 +53037,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53055,7 +53055,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53072,7 +53072,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53088,7 +53088,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53106,7 +53106,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53123,7 +53123,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53139,7 +53139,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53157,7 +53157,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53175,7 +53175,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53193,7 +53193,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53211,7 +53211,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53230,7 +53230,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53248,7 +53248,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53266,7 +53266,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53284,7 +53284,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53303,7 +53303,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53324,7 +53324,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53341,7 +53341,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, qmax) {
@@ -53355,7 +53355,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, strided_cm) {
@@ -53369,7 +53369,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -53385,7 +53385,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, strided_cn) {
@@ -53399,7 +53399,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_eq_8_strided_a) {
@@ -53413,7 +53413,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_eq_8_subtile) {
@@ -53429,7 +53429,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53446,7 +53446,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53462,7 +53462,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53477,7 +53477,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53493,7 +53493,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53511,7 +53511,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53528,7 +53528,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53544,7 +53544,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53562,7 +53562,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53579,7 +53579,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53595,7 +53595,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53613,7 +53613,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53631,7 +53631,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53649,7 +53649,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53667,7 +53667,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53686,7 +53686,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53704,7 +53704,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53722,7 +53722,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53740,7 +53740,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53759,7 +53759,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53780,7 +53780,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53797,7 +53797,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, qmax) {
@@ -53811,7 +53811,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, strided_cm) {
@@ -53825,7 +53825,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -53841,7 +53841,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, strided_cn) {
@@ -53855,7 +53855,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_eq_8_strided_a) {
@@ -53869,7 +53869,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_eq_8_subtile) {
@@ -53885,7 +53885,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53902,7 +53902,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53918,7 +53918,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53933,7 +53933,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53949,7 +53949,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53967,7 +53967,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53984,7 +53984,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54000,7 +54000,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54018,7 +54018,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54035,7 +54035,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54051,7 +54051,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54069,7 +54069,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54087,7 +54087,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54105,7 +54105,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54123,7 +54123,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54142,7 +54142,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54160,7 +54160,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54178,7 +54178,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54196,7 +54196,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54215,7 +54215,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54236,7 +54236,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54253,7 +54253,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, qmax) {
@@ -54267,7 +54267,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, strided_cm) {
@@ -54281,7 +54281,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -54297,7 +54297,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, strided_cn) {
@@ -54311,7 +54311,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_eq_8_strided_a) {
@@ -54325,7 +54325,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_eq_8_subtile) {
@@ -54341,7 +54341,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54358,7 +54358,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54374,7 +54374,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54389,7 +54389,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54405,7 +54405,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54423,7 +54423,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54440,7 +54440,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54456,7 +54456,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54474,7 +54474,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54491,7 +54491,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54507,7 +54507,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54525,7 +54525,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54543,7 +54543,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54561,7 +54561,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54579,7 +54579,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54598,7 +54598,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54616,7 +54616,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54634,7 +54634,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54652,7 +54652,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54671,7 +54671,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54692,7 +54692,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54709,7 +54709,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, qmax) {
@@ -54723,7 +54723,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, strided_cm) {
@@ -54737,7 +54737,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -54753,7 +54753,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, strided_cn) {
@@ -54767,7 +54767,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_eq_8_strided_a) {
@@ -54781,7 +54781,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_eq_8_subtile) {
@@ -54797,7 +54797,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54814,7 +54814,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54830,7 +54830,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54845,7 +54845,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54861,7 +54861,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54879,7 +54879,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54896,7 +54896,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54912,7 +54912,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54930,7 +54930,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54947,7 +54947,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54963,7 +54963,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54981,7 +54981,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54999,7 +54999,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55017,7 +55017,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55035,7 +55035,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55054,7 +55054,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55072,7 +55072,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55090,7 +55090,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55108,7 +55108,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55127,7 +55127,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55148,7 +55148,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55165,7 +55165,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, qmax) {
@@ -55179,7 +55179,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, strided_cm) {
@@ -55193,7 +55193,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -55209,7 +55209,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, strided_cn) {
@@ -55223,7 +55223,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_eq_8_strided_a) {
@@ -55237,7 +55237,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_eq_8_subtile) {
@@ -55253,7 +55253,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55270,7 +55270,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55286,7 +55286,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55301,7 +55301,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55317,7 +55317,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55335,7 +55335,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55352,7 +55352,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55368,7 +55368,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55386,7 +55386,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55403,7 +55403,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55419,7 +55419,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55437,7 +55437,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55455,7 +55455,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55473,7 +55473,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55491,7 +55491,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55510,7 +55510,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55528,7 +55528,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55546,7 +55546,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55564,7 +55564,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55583,7 +55583,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55604,7 +55604,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55621,7 +55621,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, qmax) {
@@ -55635,7 +55635,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, strided_cm) {
@@ -55649,7 +55649,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -55665,7 +55665,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, strided_cn) {
@@ -55679,7 +55679,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_eq_8_strided_a) {
@@ -55693,7 +55693,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_eq_8_subtile) {
@@ -55709,7 +55709,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55726,7 +55726,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55742,7 +55742,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55757,7 +55757,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55773,7 +55773,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55791,7 +55791,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55808,7 +55808,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55824,7 +55824,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55842,7 +55842,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55859,7 +55859,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55875,7 +55875,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55893,7 +55893,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55911,7 +55911,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55929,7 +55929,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55947,7 +55947,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55966,7 +55966,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55984,7 +55984,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56002,7 +56002,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56020,7 +56020,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56039,7 +56039,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56060,7 +56060,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56077,7 +56077,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, qmax) {
@@ -56091,7 +56091,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, strided_cm) {
@@ -56105,7 +56105,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -56121,7 +56121,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, strided_cn) {
@@ -56135,7 +56135,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_eq_8_strided_a) {
@@ -56149,7 +56149,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_eq_8_subtile) {
@@ -56165,7 +56165,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56182,7 +56182,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56198,7 +56198,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56213,7 +56213,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56229,7 +56229,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56247,7 +56247,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56264,7 +56264,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56280,7 +56280,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56298,7 +56298,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56315,7 +56315,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56331,7 +56331,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56349,7 +56349,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56367,7 +56367,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56385,7 +56385,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56403,7 +56403,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56422,7 +56422,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56440,7 +56440,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56458,7 +56458,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56476,7 +56476,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56495,7 +56495,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56516,7 +56516,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56533,7 +56533,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, qmax) {
@@ -56547,7 +56547,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, strided_cm) {
@@ -56561,7 +56561,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -56577,7 +56577,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, strided_cn) {
@@ -56591,7 +56591,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_eq_8_strided_a) {
@@ -56605,7 +56605,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_eq_8_subtile) {
@@ -56621,7 +56621,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56638,7 +56638,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56654,7 +56654,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56669,7 +56669,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56685,7 +56685,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56703,7 +56703,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56720,7 +56720,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56736,7 +56736,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56754,7 +56754,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56771,7 +56771,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56787,7 +56787,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56805,7 +56805,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56823,7 +56823,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56841,7 +56841,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56859,7 +56859,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56878,7 +56878,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56896,7 +56896,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56914,7 +56914,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56932,7 +56932,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56951,7 +56951,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56972,7 +56972,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56989,7 +56989,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, qmax) {
@@ -57003,7 +57003,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, strided_cm) {
@@ -57017,7 +57017,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -57033,7 +57033,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, strided_cn) {
@@ -57047,7 +57047,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_eq_8_strided_a) {
@@ -57061,7 +57061,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_eq_8_subtile) {
@@ -57077,7 +57077,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57094,7 +57094,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57110,7 +57110,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57125,7 +57125,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57141,7 +57141,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57159,7 +57159,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57176,7 +57176,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57192,7 +57192,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57210,7 +57210,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57227,7 +57227,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57243,7 +57243,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57261,7 +57261,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57279,7 +57279,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57297,7 +57297,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57315,7 +57315,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57334,7 +57334,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57352,7 +57352,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57370,7 +57370,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57388,7 +57388,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57407,7 +57407,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57428,7 +57428,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57445,7 +57445,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, qmax) {
@@ -57459,7 +57459,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, strided_cm) {
@@ -57473,7 +57473,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -57489,7 +57489,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, strided_cn) {
@@ -57503,7 +57503,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_eq_8_strided_a) {
@@ -57517,7 +57517,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_eq_8_subtile) {
@@ -57533,7 +57533,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57550,7 +57550,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57566,7 +57566,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57581,7 +57581,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57597,7 +57597,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57615,7 +57615,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57632,7 +57632,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57648,7 +57648,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57666,7 +57666,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57683,7 +57683,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57699,7 +57699,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57717,7 +57717,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57735,7 +57735,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57753,7 +57753,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57771,7 +57771,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57790,7 +57790,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57808,7 +57808,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57826,7 +57826,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57844,7 +57844,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57863,7 +57863,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57884,7 +57884,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57901,7 +57901,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, qmax) {
@@ -57915,7 +57915,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, strided_cm) {
@@ -57929,7 +57929,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -57945,7 +57945,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, strided_cn) {
@@ -57959,7 +57959,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_eq_8_strided_a) {
@@ -57973,7 +57973,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_eq_8_subtile) {
@@ -57989,7 +57989,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58006,7 +58006,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58022,7 +58022,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58037,7 +58037,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58053,7 +58053,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58071,7 +58071,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58088,7 +58088,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58104,7 +58104,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58122,7 +58122,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58139,7 +58139,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58155,7 +58155,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58173,7 +58173,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58191,7 +58191,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58209,7 +58209,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58227,7 +58227,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58246,7 +58246,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58264,7 +58264,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58282,7 +58282,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58300,7 +58300,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58319,7 +58319,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58340,7 +58340,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58357,7 +58357,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, qmax) {
@@ -58371,7 +58371,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, strided_cm) {
@@ -58385,7 +58385,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -58401,7 +58401,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, strided_cn) {
@@ -58415,7 +58415,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_eq_8_strided_a) {
@@ -58429,7 +58429,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_eq_8_subtile) {
@@ -58445,7 +58445,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58462,7 +58462,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58478,7 +58478,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58493,7 +58493,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58509,7 +58509,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58527,7 +58527,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58544,7 +58544,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58560,7 +58560,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58578,7 +58578,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58595,7 +58595,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58611,7 +58611,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58629,7 +58629,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58647,7 +58647,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58665,7 +58665,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58683,7 +58683,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58702,7 +58702,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58720,7 +58720,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58738,7 +58738,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58756,7 +58756,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58775,7 +58775,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58796,7 +58796,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58813,7 +58813,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, qmax) {
@@ -58827,7 +58827,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, strided_cm) {
@@ -58841,7 +58841,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -58857,7 +58857,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, strided_cn) {
@@ -58871,7 +58871,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_eq_8_strided_a) {
@@ -58885,7 +58885,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_eq_8_subtile) {
@@ -58901,7 +58901,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58918,7 +58918,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58934,7 +58934,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58949,7 +58949,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58965,7 +58965,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58983,7 +58983,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59000,7 +59000,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59016,7 +59016,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59034,7 +59034,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59051,7 +59051,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59067,7 +59067,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59085,7 +59085,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59103,7 +59103,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59121,7 +59121,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59139,7 +59139,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59158,7 +59158,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59176,7 +59176,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59194,7 +59194,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59212,7 +59212,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59231,7 +59231,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59252,7 +59252,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59269,7 +59269,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, qmax) {
@@ -59283,7 +59283,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, strided_cm) {
@@ -59297,7 +59297,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -59313,7 +59313,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, strided_cn) {
@@ -59327,7 +59327,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_eq_8_strided_a) {
@@ -59341,7 +59341,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_eq_8_subtile) {
@@ -59357,7 +59357,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59374,7 +59374,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59390,7 +59390,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59405,7 +59405,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59421,7 +59421,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59439,7 +59439,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59456,7 +59456,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59472,7 +59472,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59490,7 +59490,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59507,7 +59507,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59523,7 +59523,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59541,7 +59541,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59559,7 +59559,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59577,7 +59577,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59595,7 +59595,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59614,7 +59614,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59632,7 +59632,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59650,7 +59650,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59668,7 +59668,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59687,7 +59687,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59708,7 +59708,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59725,7 +59725,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, qmax) {
@@ -59739,7 +59739,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, strided_cm) {
@@ -59753,7 +59753,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -59769,7 +59769,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, strided_cn) {
@@ -59783,7 +59783,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_eq_8_strided_a) {
@@ -59797,7 +59797,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_eq_8_subtile) {
@@ -59813,7 +59813,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59830,7 +59830,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59846,7 +59846,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59861,7 +59861,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59877,7 +59877,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59895,7 +59895,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59912,7 +59912,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59928,7 +59928,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59946,7 +59946,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59963,7 +59963,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59979,7 +59979,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59997,7 +59997,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60015,7 +60015,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60033,7 +60033,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60051,7 +60051,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60070,7 +60070,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60088,7 +60088,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60106,7 +60106,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60124,7 +60124,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60143,7 +60143,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60164,7 +60164,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60181,7 +60181,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, qmax) {
@@ -60195,7 +60195,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, strided_cm) {
@@ -60209,7 +60209,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -60225,7 +60225,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, strided_cn) {
@@ -60239,7 +60239,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_eq_8_strided_a) {
@@ -60253,7 +60253,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_eq_8_subtile) {
@@ -60269,7 +60269,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60286,7 +60286,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60302,7 +60302,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60317,7 +60317,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60333,7 +60333,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60351,7 +60351,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60368,7 +60368,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60384,7 +60384,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60402,7 +60402,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60419,7 +60419,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60435,7 +60435,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60453,7 +60453,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60471,7 +60471,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60489,7 +60489,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60507,7 +60507,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60526,7 +60526,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60544,7 +60544,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60562,7 +60562,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60580,7 +60580,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60599,7 +60599,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60620,7 +60620,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60637,7 +60637,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, qmax) {
@@ -60651,7 +60651,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, strided_cm) {
@@ -60665,7 +60665,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -60682,7 +60682,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, strided_cn) {
@@ -60697,7 +60697,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, k_eq_8_strided_a) {
@@ -60712,7 +60712,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE2, k_eq_8_subtile) {
@@ -60729,7 +60729,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60747,7 +60747,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60764,7 +60764,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60780,7 +60780,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60797,7 +60797,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60816,7 +60816,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60834,7 +60834,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60851,7 +60851,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60870,7 +60870,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60888,7 +60888,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60905,7 +60905,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60924,7 +60924,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60943,7 +60943,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60962,7 +60962,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60981,7 +60981,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61001,7 +61001,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61020,7 +61020,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61039,7 +61039,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61058,7 +61058,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61078,7 +61078,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61100,7 +61100,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61118,7 +61118,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -61135,7 +61135,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, strided_cn) {
@@ -61150,7 +61150,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, k_eq_8_strided_a) {
@@ -61165,7 +61165,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE2, k_eq_8_subtile) {
@@ -61182,7 +61182,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61200,7 +61200,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61217,7 +61217,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61233,7 +61233,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61250,7 +61250,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61269,7 +61269,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61287,7 +61287,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61304,7 +61304,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61323,7 +61323,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61341,7 +61341,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61358,7 +61358,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61377,7 +61377,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61396,7 +61396,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61415,7 +61415,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61434,7 +61434,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61454,7 +61454,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61473,7 +61473,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61492,7 +61492,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61511,7 +61511,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61531,7 +61531,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61553,7 +61553,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61571,7 +61571,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -61588,7 +61588,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, strided_cn) {
@@ -61603,7 +61603,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, k_eq_8_strided_a) {
@@ -61618,7 +61618,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSSE3, k_eq_8_subtile) {
@@ -61635,7 +61635,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61653,7 +61653,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61670,7 +61670,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61686,7 +61686,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61703,7 +61703,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61722,7 +61722,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61740,7 +61740,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61757,7 +61757,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61776,7 +61776,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61794,7 +61794,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61811,7 +61811,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61830,7 +61830,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61849,7 +61849,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61868,7 +61868,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61887,7 +61887,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61907,7 +61907,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61926,7 +61926,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61945,7 +61945,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61964,7 +61964,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61984,7 +61984,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62006,7 +62006,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62024,7 +62024,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -62041,7 +62041,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, strided_cn) {
@@ -62056,7 +62056,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, k_eq_8_strided_a) {
@@ -62071,7 +62071,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSSE3, k_eq_8_subtile) {
@@ -62088,7 +62088,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62106,7 +62106,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62123,7 +62123,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62139,7 +62139,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62156,7 +62156,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62175,7 +62175,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62193,7 +62193,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62210,7 +62210,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62229,7 +62229,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62247,7 +62247,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62264,7 +62264,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62283,7 +62283,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62302,7 +62302,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62321,7 +62321,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62340,7 +62340,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62360,7 +62360,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62379,7 +62379,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62398,7 +62398,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62417,7 +62417,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62437,7 +62437,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62459,7 +62459,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62477,7 +62477,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -62494,7 +62494,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, strided_cn) {
@@ -62509,7 +62509,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, k_eq_8_strided_a) {
@@ -62524,7 +62524,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__SSE41, k_eq_8_subtile) {
@@ -62541,7 +62541,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62559,7 +62559,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62576,7 +62576,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62592,7 +62592,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62609,7 +62609,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62628,7 +62628,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62646,7 +62646,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62663,7 +62663,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62682,7 +62682,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62700,7 +62700,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62717,7 +62717,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62736,7 +62736,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62755,7 +62755,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62774,7 +62774,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62793,7 +62793,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62813,7 +62813,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62832,7 +62832,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62851,7 +62851,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62870,7 +62870,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62890,7 +62890,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62912,7 +62912,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62930,7 +62930,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -62947,7 +62947,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, strided_cn) {
@@ -62962,7 +62962,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, k_eq_8_strided_a) {
@@ -62977,7 +62977,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__SSE41, k_eq_8_subtile) {
@@ -62994,7 +62994,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63012,7 +63012,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63029,7 +63029,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63045,7 +63045,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63062,7 +63062,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63081,7 +63081,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63099,7 +63099,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63116,7 +63116,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63135,7 +63135,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63153,7 +63153,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63170,7 +63170,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63189,7 +63189,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63208,7 +63208,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63227,7 +63227,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63246,7 +63246,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63266,7 +63266,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63285,7 +63285,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63304,7 +63304,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63323,7 +63323,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63343,7 +63343,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63365,7 +63365,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63383,7 +63383,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -63400,7 +63400,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, strided_cn) {
@@ -63415,7 +63415,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, k_eq_8_strided_a) {
@@ -63430,7 +63430,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__AVX, k_eq_8_subtile) {
@@ -63447,7 +63447,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63465,7 +63465,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63482,7 +63482,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63498,7 +63498,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63515,7 +63515,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63534,7 +63534,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63552,7 +63552,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63569,7 +63569,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63588,7 +63588,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63606,7 +63606,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63623,7 +63623,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63642,7 +63642,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63661,7 +63661,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63680,7 +63680,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63699,7 +63699,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63719,7 +63719,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63738,7 +63738,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63757,7 +63757,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63776,7 +63776,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63796,7 +63796,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63818,7 +63818,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63836,7 +63836,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -63853,7 +63853,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, strided_cn) {
@@ -63868,7 +63868,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, k_eq_8_strided_a) {
@@ -63883,7 +63883,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__AVX, k_eq_8_subtile) {
@@ -63900,7 +63900,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63918,7 +63918,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63935,7 +63935,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63951,7 +63951,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63968,7 +63968,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63987,7 +63987,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64005,7 +64005,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64022,7 +64022,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64041,7 +64041,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64059,7 +64059,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64076,7 +64076,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64095,7 +64095,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64114,7 +64114,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64133,7 +64133,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64152,7 +64152,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64172,7 +64172,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64191,7 +64191,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64210,7 +64210,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64229,7 +64229,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64249,7 +64249,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64271,7 +64271,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64289,7 +64289,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -64306,7 +64306,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, strided_cn) {
@@ -64321,7 +64321,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, k_eq_8_strided_a) {
@@ -64336,7 +64336,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C2__XOP, k_eq_8_subtile) {
@@ -64353,7 +64353,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64371,7 +64371,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64388,7 +64388,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64404,7 +64404,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64421,7 +64421,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64440,7 +64440,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64458,7 +64458,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64475,7 +64475,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64494,7 +64494,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64512,7 +64512,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64529,7 +64529,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64548,7 +64548,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64567,7 +64567,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64586,7 +64586,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64605,7 +64605,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64625,7 +64625,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64644,7 +64644,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64663,7 +64663,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64682,7 +64682,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64702,7 +64702,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64724,7 +64724,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64742,7 +64742,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -64759,7 +64759,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, strided_cn) {
@@ -64774,7 +64774,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, k_eq_8_strided_a) {
@@ -64789,7 +64789,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_4X4C2__XOP, k_eq_8_subtile) {
@@ -64806,7 +64806,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64824,7 +64824,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64841,7 +64841,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64857,7 +64857,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64874,7 +64874,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64893,7 +64893,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64911,7 +64911,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64928,7 +64928,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64947,7 +64947,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64965,7 +64965,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64982,7 +64982,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65001,7 +65001,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65020,7 +65020,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65039,7 +65039,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65058,7 +65058,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65078,7 +65078,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65097,7 +65097,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65116,7 +65116,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65135,7 +65135,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65155,7 +65155,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65177,7 +65177,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65195,7 +65195,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -65211,7 +65211,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, strided_cn) {
@@ -65225,7 +65225,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_eq_8_strided_a) {
@@ -65239,7 +65239,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_eq_8_subtile) {
@@ -65255,7 +65255,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65272,7 +65272,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65288,7 +65288,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65303,7 +65303,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65319,7 +65319,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65337,7 +65337,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65354,7 +65354,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65370,7 +65370,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65388,7 +65388,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65405,7 +65405,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65421,7 +65421,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65439,7 +65439,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65457,7 +65457,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65475,7 +65475,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65493,7 +65493,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65512,7 +65512,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65530,7 +65530,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65548,7 +65548,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65566,7 +65566,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65585,7 +65585,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65606,7 +65606,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65623,7 +65623,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, qmax) {
@@ -65637,7 +65637,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, strided_cm) {
@@ -65651,7 +65651,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -65667,7 +65667,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, strided_cn) {
@@ -65681,7 +65681,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_eq_8_strided_a) {
@@ -65695,7 +65695,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_eq_8_subtile) {
@@ -65711,7 +65711,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65728,7 +65728,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65744,7 +65744,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65759,7 +65759,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65775,7 +65775,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65793,7 +65793,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65810,7 +65810,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65826,7 +65826,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65844,7 +65844,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65861,7 +65861,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65877,7 +65877,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65895,7 +65895,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65913,7 +65913,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65931,7 +65931,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65949,7 +65949,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65968,7 +65968,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65986,7 +65986,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66004,7 +66004,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66022,7 +66022,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66041,7 +66041,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66062,7 +66062,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66079,7 +66079,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, qmax) {
@@ -66093,7 +66093,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, strided_cm) {
@@ -66107,7 +66107,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -66123,7 +66123,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, strided_cn) {
@@ -66137,7 +66137,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_eq_8_strided_a) {
@@ -66151,7 +66151,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_eq_8_subtile) {
@@ -66167,7 +66167,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66184,7 +66184,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66200,7 +66200,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66215,7 +66215,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66231,7 +66231,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66249,7 +66249,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66266,7 +66266,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66282,7 +66282,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66300,7 +66300,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66317,7 +66317,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66333,7 +66333,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66351,7 +66351,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66369,7 +66369,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66387,7 +66387,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66405,7 +66405,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66424,7 +66424,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66442,7 +66442,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66460,7 +66460,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66478,7 +66478,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66497,7 +66497,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66518,7 +66518,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66535,7 +66535,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, qmax) {
@@ -66549,7 +66549,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, strided_cm) {
@@ -66563,7 +66563,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -66579,7 +66579,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, strided_cn) {
@@ -66593,7 +66593,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_eq_8_strided_a) {
@@ -66607,7 +66607,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_eq_8_subtile) {
@@ -66623,7 +66623,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66640,7 +66640,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66656,7 +66656,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66671,7 +66671,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66687,7 +66687,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66705,7 +66705,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66722,7 +66722,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66738,7 +66738,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66756,7 +66756,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66773,7 +66773,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66789,7 +66789,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66807,7 +66807,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66825,7 +66825,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66843,7 +66843,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66861,7 +66861,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66880,7 +66880,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66898,7 +66898,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66916,7 +66916,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66934,7 +66934,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66953,7 +66953,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66974,7 +66974,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66991,7 +66991,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, qmax) {
@@ -67005,7 +67005,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, strided_cm) {
@@ -67019,7 +67019,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -67035,7 +67035,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, strided_cn) {
@@ -67049,7 +67049,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_eq_8_strided_a) {
@@ -67063,7 +67063,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_eq_8_subtile) {
@@ -67079,7 +67079,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67096,7 +67096,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67112,7 +67112,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67127,7 +67127,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67143,7 +67143,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67161,7 +67161,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67178,7 +67178,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67194,7 +67194,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67212,7 +67212,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67229,7 +67229,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67245,7 +67245,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67263,7 +67263,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67281,7 +67281,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67299,7 +67299,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67317,7 +67317,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67336,7 +67336,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67354,7 +67354,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67372,7 +67372,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67390,7 +67390,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67409,7 +67409,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67430,7 +67430,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67447,7 +67447,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, qmax) {
@@ -67461,7 +67461,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, strided_cm) {
@@ -67475,7 +67475,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -67491,7 +67491,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, strided_cn) {
@@ -67505,7 +67505,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_eq_8_strided_a) {
@@ -67519,7 +67519,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_eq_8_subtile) {
@@ -67535,7 +67535,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67552,7 +67552,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67568,7 +67568,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67583,7 +67583,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67599,7 +67599,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67617,7 +67617,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67634,7 +67634,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67650,7 +67650,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67668,7 +67668,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67685,7 +67685,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67701,7 +67701,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67719,7 +67719,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67737,7 +67737,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67755,7 +67755,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67773,7 +67773,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67792,7 +67792,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67810,7 +67810,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67828,7 +67828,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67846,7 +67846,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67865,7 +67865,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67886,7 +67886,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67903,7 +67903,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, qmax) {
@@ -67917,7 +67917,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, strided_cm) {
@@ -67931,7 +67931,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -67947,7 +67947,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, strided_cn) {
@@ -67961,7 +67961,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_eq_8_strided_a) {
@@ -67975,7 +67975,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_eq_8_subtile) {
@@ -67991,7 +67991,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68008,7 +68008,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68024,7 +68024,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68039,7 +68039,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68055,7 +68055,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68073,7 +68073,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68090,7 +68090,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68106,7 +68106,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68124,7 +68124,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68141,7 +68141,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68157,7 +68157,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68175,7 +68175,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68193,7 +68193,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68211,7 +68211,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68229,7 +68229,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68248,7 +68248,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68266,7 +68266,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68284,7 +68284,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68302,7 +68302,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68321,7 +68321,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68342,7 +68342,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68359,7 +68359,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, qmax) {
@@ -68373,7 +68373,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, strided_cm) {
@@ -68387,7 +68387,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -68403,7 +68403,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, strided_cn) {
@@ -68417,7 +68417,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_eq_8_strided_a) {
@@ -68431,7 +68431,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_eq_8_subtile) {
@@ -68447,7 +68447,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68464,7 +68464,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68480,7 +68480,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68495,7 +68495,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68511,7 +68511,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68529,7 +68529,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68546,7 +68546,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68562,7 +68562,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68580,7 +68580,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68597,7 +68597,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68613,7 +68613,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68631,7 +68631,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68649,7 +68649,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68667,7 +68667,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68685,7 +68685,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68704,7 +68704,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68722,7 +68722,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68740,7 +68740,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68758,7 +68758,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68777,7 +68777,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68798,7 +68798,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68815,7 +68815,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, qmax) {
@@ -68829,7 +68829,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, strided_cm) {
@@ -68843,7 +68843,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -68859,7 +68859,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, strided_cn) {
@@ -68873,7 +68873,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_eq_8_strided_a) {
@@ -68887,7 +68887,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_eq_8_subtile) {
@@ -68903,7 +68903,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68920,7 +68920,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68936,7 +68936,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68951,7 +68951,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68967,7 +68967,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68985,7 +68985,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69002,7 +69002,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69018,7 +69018,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69036,7 +69036,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69053,7 +69053,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69069,7 +69069,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69087,7 +69087,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69105,7 +69105,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69123,7 +69123,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69141,7 +69141,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69160,7 +69160,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69178,7 +69178,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69196,7 +69196,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69214,7 +69214,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69233,7 +69233,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69254,7 +69254,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69271,7 +69271,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, qmax) {
@@ -69285,7 +69285,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, strided_cm) {
@@ -69299,7 +69299,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -69315,7 +69315,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, strided_cn) {
@@ -69329,7 +69329,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_eq_8_strided_a) {
@@ -69343,7 +69343,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_eq_8_subtile) {
@@ -69359,7 +69359,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69376,7 +69376,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69392,7 +69392,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69407,7 +69407,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69423,7 +69423,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69441,7 +69441,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69458,7 +69458,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69474,7 +69474,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69492,7 +69492,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69509,7 +69509,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69525,7 +69525,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69543,7 +69543,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69561,7 +69561,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69579,7 +69579,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69597,7 +69597,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69616,7 +69616,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69634,7 +69634,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69652,7 +69652,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69670,7 +69670,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69689,7 +69689,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69710,7 +69710,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69727,7 +69727,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, qmax) {
@@ -69741,7 +69741,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, strided_cm) {
@@ -69755,7 +69755,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -69771,7 +69771,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, strided_cn) {
@@ -69785,7 +69785,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_eq_8_strided_a) {
@@ -69799,7 +69799,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_eq_8_subtile) {
@@ -69815,7 +69815,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69832,7 +69832,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69848,7 +69848,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69863,7 +69863,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69879,7 +69879,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69897,7 +69897,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69914,7 +69914,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69930,7 +69930,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69948,7 +69948,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69965,7 +69965,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69981,7 +69981,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69999,7 +69999,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70017,7 +70017,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70035,7 +70035,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70053,7 +70053,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70072,7 +70072,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70090,7 +70090,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70108,7 +70108,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70126,7 +70126,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70145,7 +70145,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70166,7 +70166,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70183,7 +70183,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, qmax) {
@@ -70197,7 +70197,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, strided_cm) {
@@ -70211,7 +70211,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -70227,7 +70227,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, strided_cn) {
@@ -70241,7 +70241,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_eq_8_strided_a) {
@@ -70255,7 +70255,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_eq_8_subtile) {
@@ -70271,7 +70271,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70288,7 +70288,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70304,7 +70304,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70319,7 +70319,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70335,7 +70335,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70353,7 +70353,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70370,7 +70370,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70386,7 +70386,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70404,7 +70404,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70421,7 +70421,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70437,7 +70437,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70455,7 +70455,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70473,7 +70473,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70491,7 +70491,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70509,7 +70509,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70528,7 +70528,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70546,7 +70546,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70564,7 +70564,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70582,7 +70582,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70601,7 +70601,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70622,7 +70622,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70639,7 +70639,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, qmax) {
@@ -70653,7 +70653,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, strided_cm) {
@@ -70667,7 +70667,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -70683,7 +70683,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, strided_cn) {
@@ -70697,7 +70697,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_eq_8_strided_a) {
@@ -70711,7 +70711,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_eq_8_subtile) {
@@ -70727,7 +70727,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70744,7 +70744,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70760,7 +70760,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70775,7 +70775,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70791,7 +70791,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70809,7 +70809,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70826,7 +70826,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70842,7 +70842,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70860,7 +70860,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70877,7 +70877,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70893,7 +70893,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70911,7 +70911,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70929,7 +70929,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70947,7 +70947,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70965,7 +70965,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70984,7 +70984,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71002,7 +71002,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71020,7 +71020,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71038,7 +71038,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71057,7 +71057,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71078,7 +71078,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71095,7 +71095,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, qmax) {
@@ -71109,7 +71109,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, strided_cm) {
@@ -71123,7 +71123,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -71139,7 +71139,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, strided_cn) {
@@ -71153,7 +71153,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_eq_8_strided_a) {
@@ -71167,7 +71167,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_eq_8_subtile) {
@@ -71183,7 +71183,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71200,7 +71200,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71216,7 +71216,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71231,7 +71231,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71247,7 +71247,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71265,7 +71265,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71282,7 +71282,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71298,7 +71298,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71316,7 +71316,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71333,7 +71333,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71349,7 +71349,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71367,7 +71367,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71385,7 +71385,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71403,7 +71403,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71421,7 +71421,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71440,7 +71440,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71458,7 +71458,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71476,7 +71476,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71494,7 +71494,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71513,7 +71513,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71534,7 +71534,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71551,7 +71551,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, qmax) {
@@ -71565,7 +71565,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, strided_cm) {
@@ -71579,7 +71579,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -71595,7 +71595,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, strided_cn) {
@@ -71609,7 +71609,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_eq_8_strided_a) {
@@ -71623,7 +71623,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_eq_8_subtile) {
@@ -71639,7 +71639,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71656,7 +71656,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71672,7 +71672,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71687,7 +71687,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71703,7 +71703,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71721,7 +71721,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71738,7 +71738,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71754,7 +71754,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71772,7 +71772,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71789,7 +71789,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71805,7 +71805,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71823,7 +71823,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71841,7 +71841,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71859,7 +71859,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71877,7 +71877,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71896,7 +71896,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71914,7 +71914,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71932,7 +71932,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71950,7 +71950,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71969,7 +71969,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71990,7 +71990,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72007,7 +72007,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, qmax) {
@@ -72021,7 +72021,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, strided_cm) {
@@ -72035,7 +72035,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -72051,7 +72051,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, strided_cn) {
@@ -72065,7 +72065,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_eq_8_strided_a) {
@@ -72079,7 +72079,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_eq_8_subtile) {
@@ -72095,7 +72095,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72112,7 +72112,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72128,7 +72128,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72143,7 +72143,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72159,7 +72159,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72177,7 +72177,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72194,7 +72194,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72210,7 +72210,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72228,7 +72228,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72245,7 +72245,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72261,7 +72261,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72279,7 +72279,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72297,7 +72297,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72315,7 +72315,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72333,7 +72333,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72352,7 +72352,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72370,7 +72370,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72388,7 +72388,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72406,7 +72406,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72425,7 +72425,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72446,7 +72446,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72463,7 +72463,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, qmax) {
@@ -72477,7 +72477,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, strided_cm) {
@@ -72491,7 +72491,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -72507,7 +72507,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, strided_cn) {
@@ -72521,7 +72521,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_eq_8_strided_a) {
@@ -72535,7 +72535,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_eq_8_subtile) {
@@ -72551,7 +72551,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72568,7 +72568,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72584,7 +72584,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72599,7 +72599,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72615,7 +72615,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72633,7 +72633,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72650,7 +72650,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72666,7 +72666,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72684,7 +72684,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72701,7 +72701,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72717,7 +72717,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72735,7 +72735,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72753,7 +72753,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72771,7 +72771,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72789,7 +72789,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72808,7 +72808,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72826,7 +72826,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72844,7 +72844,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72862,7 +72862,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72881,7 +72881,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72902,7 +72902,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72919,7 +72919,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, qmax) {
@@ -72933,7 +72933,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, strided_cm) {
@@ -72947,7 +72947,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -72963,7 +72963,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, strided_cn) {
@@ -72977,7 +72977,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_eq_8_strided_a) {
@@ -72991,7 +72991,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_eq_8_subtile) {
@@ -73007,7 +73007,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73024,7 +73024,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73040,7 +73040,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73055,7 +73055,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73071,7 +73071,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73089,7 +73089,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73106,7 +73106,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73122,7 +73122,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73140,7 +73140,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73157,7 +73157,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73173,7 +73173,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73191,7 +73191,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73209,7 +73209,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73227,7 +73227,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73245,7 +73245,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73264,7 +73264,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73282,7 +73282,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73300,7 +73300,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73318,7 +73318,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73337,7 +73337,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73358,7 +73358,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73375,7 +73375,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, qmax) {
@@ -73389,7 +73389,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, strided_cm) {
@@ -73403,7 +73403,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -73419,7 +73419,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, strided_cn) {
@@ -73433,7 +73433,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_eq_8_strided_a) {
@@ -73447,7 +73447,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_eq_8_subtile) {
@@ -73463,7 +73463,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73480,7 +73480,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73496,7 +73496,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73511,7 +73511,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73527,7 +73527,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73545,7 +73545,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73562,7 +73562,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73578,7 +73578,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73596,7 +73596,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73613,7 +73613,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73629,7 +73629,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73647,7 +73647,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73665,7 +73665,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73683,7 +73683,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73701,7 +73701,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73720,7 +73720,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73738,7 +73738,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73756,7 +73756,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73774,7 +73774,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73793,7 +73793,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73814,7 +73814,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73831,7 +73831,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, qmax) {
@@ -73845,7 +73845,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, strided_cm) {
@@ -73859,7 +73859,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -73875,7 +73875,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, strided_cn) {
@@ -73889,7 +73889,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_eq_8_strided_a) {
@@ -73903,7 +73903,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_eq_8_subtile) {
@@ -73919,7 +73919,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73936,7 +73936,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73952,7 +73952,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73967,7 +73967,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73983,7 +73983,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74001,7 +74001,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74018,7 +74018,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74034,7 +74034,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74052,7 +74052,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74069,7 +74069,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74085,7 +74085,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74103,7 +74103,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74121,7 +74121,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74139,7 +74139,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74157,7 +74157,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74176,7 +74176,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74194,7 +74194,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74212,7 +74212,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74230,7 +74230,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74249,7 +74249,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74270,7 +74270,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74287,7 +74287,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, qmax) {
@@ -74301,7 +74301,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, strided_cm) {
@@ -74315,7 +74315,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -74331,7 +74331,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, strided_cn) {
@@ -74345,7 +74345,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_eq_8_strided_a) {
@@ -74359,7 +74359,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_eq_8_subtile) {
@@ -74375,7 +74375,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74392,7 +74392,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74408,7 +74408,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74423,7 +74423,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74439,7 +74439,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74457,7 +74457,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74474,7 +74474,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74490,7 +74490,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74508,7 +74508,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74525,7 +74525,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74541,7 +74541,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74559,7 +74559,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74577,7 +74577,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74595,7 +74595,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74613,7 +74613,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74632,7 +74632,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74650,7 +74650,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74668,7 +74668,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74686,7 +74686,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74705,7 +74705,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74726,7 +74726,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74743,7 +74743,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, qmax) {
@@ -74757,7 +74757,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, strided_cm) {
@@ -74771,7 +74771,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -74787,7 +74787,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, strided_cn) {
@@ -74801,7 +74801,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_eq_8_strided_a) {
@@ -74815,7 +74815,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_eq_8_subtile) {
@@ -74831,7 +74831,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74848,7 +74848,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74864,7 +74864,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74879,7 +74879,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74895,7 +74895,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74913,7 +74913,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74930,7 +74930,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74946,7 +74946,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74964,7 +74964,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74981,7 +74981,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74997,7 +74997,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75015,7 +75015,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75033,7 +75033,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75051,7 +75051,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75069,7 +75069,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75088,7 +75088,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75106,7 +75106,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75124,7 +75124,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75142,7 +75142,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75161,7 +75161,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75182,7 +75182,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75199,7 +75199,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, qmax) {
@@ -75213,7 +75213,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, strided_cm) {
@@ -75227,7 +75227,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -75243,7 +75243,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, strided_cn) {
@@ -75257,7 +75257,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_eq_8_strided_a) {
@@ -75271,7 +75271,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_eq_8_subtile) {
@@ -75287,7 +75287,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75304,7 +75304,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75320,7 +75320,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75335,7 +75335,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75351,7 +75351,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75369,7 +75369,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75386,7 +75386,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75402,7 +75402,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75420,7 +75420,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75437,7 +75437,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75453,7 +75453,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75471,7 +75471,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75489,7 +75489,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75507,7 +75507,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75525,7 +75525,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75544,7 +75544,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75562,7 +75562,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75580,7 +75580,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75598,7 +75598,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75617,7 +75617,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75638,7 +75638,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75655,7 +75655,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, qmax) {
@@ -75669,7 +75669,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, strided_cm) {
@@ -75683,7 +75683,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -75699,7 +75699,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, strided_cn) {
@@ -75713,7 +75713,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_eq_8_strided_a) {
@@ -75727,7 +75727,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_eq_8_subtile) {
@@ -75743,7 +75743,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75760,7 +75760,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75776,7 +75776,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75791,7 +75791,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75807,7 +75807,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75825,7 +75825,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75842,7 +75842,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75858,7 +75858,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75876,7 +75876,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75893,7 +75893,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75909,7 +75909,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75927,7 +75927,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75945,7 +75945,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75963,7 +75963,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75981,7 +75981,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76000,7 +76000,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76018,7 +76018,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76036,7 +76036,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76054,7 +76054,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76073,7 +76073,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76094,7 +76094,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76111,7 +76111,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, qmax) {
@@ -76125,7 +76125,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, strided_cm) {
@@ -76139,7 +76139,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -76155,7 +76155,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, strided_cn) {
@@ -76169,7 +76169,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_eq_8_strided_a) {
@@ -76183,7 +76183,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_eq_8_subtile) {
@@ -76199,7 +76199,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76216,7 +76216,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76232,7 +76232,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76247,7 +76247,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76263,7 +76263,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76281,7 +76281,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76298,7 +76298,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76314,7 +76314,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76332,7 +76332,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76349,7 +76349,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76365,7 +76365,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76383,7 +76383,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76401,7 +76401,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76419,7 +76419,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76437,7 +76437,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76456,7 +76456,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76474,7 +76474,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76492,7 +76492,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76510,7 +76510,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76529,7 +76529,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76550,7 +76550,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76567,7 +76567,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, qmax) {
@@ -76581,7 +76581,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, strided_cm) {
@@ -76595,7 +76595,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -76611,7 +76611,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, strided_cn) {
@@ -76625,7 +76625,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_eq_8_strided_a) {
@@ -76639,7 +76639,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_eq_8_subtile) {
@@ -76655,7 +76655,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76672,7 +76672,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76688,7 +76688,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76703,7 +76703,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76719,7 +76719,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76737,7 +76737,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76754,7 +76754,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76770,7 +76770,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76788,7 +76788,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76805,7 +76805,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76821,7 +76821,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76839,7 +76839,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76857,7 +76857,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76875,7 +76875,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76893,7 +76893,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76912,7 +76912,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76930,7 +76930,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76948,7 +76948,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76966,7 +76966,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76985,7 +76985,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77006,7 +77006,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77023,7 +77023,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, qmax) {
@@ -77037,7 +77037,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, strided_cm) {
@@ -77051,7 +77051,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -77067,7 +77067,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, strided_cn) {
@@ -77081,7 +77081,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_eq_8_strided_a) {
@@ -77095,7 +77095,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_eq_8_subtile) {
@@ -77111,7 +77111,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77128,7 +77128,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77144,7 +77144,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77159,7 +77159,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77175,7 +77175,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77193,7 +77193,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77210,7 +77210,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77226,7 +77226,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77244,7 +77244,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77261,7 +77261,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77277,7 +77277,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77295,7 +77295,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77313,7 +77313,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77331,7 +77331,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77349,7 +77349,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77368,7 +77368,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77386,7 +77386,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77404,7 +77404,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77422,7 +77422,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77441,7 +77441,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77462,7 +77462,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77479,7 +77479,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, qmax) {
@@ -77493,7 +77493,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, strided_cm) {
@@ -77507,7 +77507,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -77523,7 +77523,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, strided_cn) {
@@ -77537,7 +77537,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_eq_8_strided_a) {
@@ -77551,7 +77551,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_eq_8_subtile) {
@@ -77567,7 +77567,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77584,7 +77584,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77600,7 +77600,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77615,7 +77615,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77631,7 +77631,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77649,7 +77649,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77666,7 +77666,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77682,7 +77682,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77700,7 +77700,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77717,7 +77717,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77733,7 +77733,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77751,7 +77751,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77769,7 +77769,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77787,7 +77787,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77805,7 +77805,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77824,7 +77824,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77842,7 +77842,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77860,7 +77860,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77878,7 +77878,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77897,7 +77897,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77918,7 +77918,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77935,7 +77935,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, qmax) {
@@ -77949,7 +77949,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, strided_cm) {
@@ -77963,7 +77963,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -77979,7 +77979,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, strided_cn) {
@@ -77993,7 +77993,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_eq_8_strided_a) {
@@ -78007,7 +78007,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_eq_8_subtile) {
@@ -78023,7 +78023,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78040,7 +78040,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78056,7 +78056,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78071,7 +78071,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78087,7 +78087,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78105,7 +78105,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -78122,7 +78122,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78138,7 +78138,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78156,7 +78156,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -78173,7 +78173,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78189,7 +78189,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78207,7 +78207,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -78225,7 +78225,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78243,7 +78243,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78261,7 +78261,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78280,7 +78280,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -78298,7 +78298,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78316,7 +78316,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78334,7 +78334,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78353,7 +78353,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -78374,7 +78374,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -78391,7 +78391,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, qmax) {
@@ -78405,7 +78405,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, strided_cm) {
@@ -78419,7 +78419,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -78435,7 +78435,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, strided_cn) {
@@ -78449,7 +78449,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_eq_8_strided_a) {
@@ -78463,7 +78463,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_eq_8_subtile) {
@@ -78479,7 +78479,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78496,7 +78496,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78512,7 +78512,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78527,7 +78527,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78543,7 +78543,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78561,7 +78561,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -78578,7 +78578,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78594,7 +78594,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78612,7 +78612,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -78629,7 +78629,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78645,7 +78645,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78663,7 +78663,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -78681,7 +78681,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78699,7 +78699,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78717,7 +78717,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78736,7 +78736,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -78754,7 +78754,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78772,7 +78772,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78790,7 +78790,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78809,7 +78809,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -78830,7 +78830,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -78847,7 +78847,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, qmax) {
@@ -78861,7 +78861,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, strided_cm) {
@@ -78875,7 +78875,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -78892,7 +78892,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, strided_cn) {
@@ -78907,7 +78907,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, k_eq_8_strided_a) {
@@ -78922,7 +78922,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE2, k_eq_8_subtile) {
@@ -78939,7 +78939,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78957,7 +78957,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78974,7 +78974,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78990,7 +78990,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -79007,7 +79007,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -79026,7 +79026,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -79044,7 +79044,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -79061,7 +79061,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -79080,7 +79080,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -79098,7 +79098,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -79115,7 +79115,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -79134,7 +79134,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -79153,7 +79153,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79172,7 +79172,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79191,7 +79191,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79211,7 +79211,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -79230,7 +79230,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79249,7 +79249,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79268,7 +79268,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79288,7 +79288,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -79310,7 +79310,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -79328,7 +79328,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -79345,7 +79345,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, strided_cn) {
@@ -79360,7 +79360,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, k_eq_8_strided_a) {
@@ -79375,7 +79375,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE2, k_eq_8_subtile) {
@@ -79392,7 +79392,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79410,7 +79410,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -79427,7 +79427,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -79443,7 +79443,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -79460,7 +79460,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -79479,7 +79479,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -79497,7 +79497,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -79514,7 +79514,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -79533,7 +79533,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -79551,7 +79551,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -79568,7 +79568,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -79587,7 +79587,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -79606,7 +79606,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79625,7 +79625,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79644,7 +79644,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79664,7 +79664,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -79683,7 +79683,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79702,7 +79702,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79721,7 +79721,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79741,7 +79741,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -79763,7 +79763,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -79781,7 +79781,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -79798,7 +79798,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, strided_cn) {
@@ -79813,7 +79813,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, k_eq_8_strided_a) {
@@ -79828,7 +79828,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE2, k_eq_8_subtile) {
@@ -79845,7 +79845,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79863,7 +79863,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -79880,7 +79880,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -79896,7 +79896,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -79913,7 +79913,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -79932,7 +79932,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -79950,7 +79950,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -79967,7 +79967,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -79986,7 +79986,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -80004,7 +80004,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -80021,7 +80021,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -80040,7 +80040,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -80059,7 +80059,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80078,7 +80078,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80097,7 +80097,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80117,7 +80117,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -80136,7 +80136,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80155,7 +80155,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80174,7 +80174,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80194,7 +80194,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -80216,7 +80216,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -80234,7 +80234,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -80251,7 +80251,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, strided_cn) {
@@ -80266,7 +80266,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, k_eq_8_strided_a) {
@@ -80281,7 +80281,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSSE3, k_eq_8_subtile) {
@@ -80298,7 +80298,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80316,7 +80316,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -80333,7 +80333,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -80349,7 +80349,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -80366,7 +80366,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -80385,7 +80385,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -80403,7 +80403,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -80420,7 +80420,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -80439,7 +80439,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -80457,7 +80457,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -80474,7 +80474,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -80493,7 +80493,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -80512,7 +80512,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80531,7 +80531,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80550,7 +80550,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80570,7 +80570,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -80589,7 +80589,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80608,7 +80608,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80627,7 +80627,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80647,7 +80647,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -80669,7 +80669,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -80687,7 +80687,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -80704,7 +80704,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, strided_cn) {
@@ -80719,7 +80719,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, k_eq_8_strided_a) {
@@ -80734,7 +80734,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSSE3, k_eq_8_subtile) {
@@ -80751,7 +80751,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80769,7 +80769,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -80786,7 +80786,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -80802,7 +80802,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -80819,7 +80819,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -80838,7 +80838,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -80856,7 +80856,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -80873,7 +80873,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -80892,7 +80892,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -80910,7 +80910,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -80927,7 +80927,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -80946,7 +80946,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -80965,7 +80965,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80984,7 +80984,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81003,7 +81003,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81023,7 +81023,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -81042,7 +81042,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81061,7 +81061,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81080,7 +81080,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81100,7 +81100,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -81122,7 +81122,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -81140,7 +81140,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -81157,7 +81157,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, strided_cn) {
@@ -81172,7 +81172,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, k_eq_8_strided_a) {
@@ -81187,7 +81187,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSSE3, k_eq_8_subtile) {
@@ -81204,7 +81204,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81222,7 +81222,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -81239,7 +81239,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -81255,7 +81255,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -81272,7 +81272,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -81291,7 +81291,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -81309,7 +81309,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -81326,7 +81326,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -81345,7 +81345,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -81363,7 +81363,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -81380,7 +81380,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -81399,7 +81399,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -81418,7 +81418,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81437,7 +81437,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81456,7 +81456,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81476,7 +81476,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -81495,7 +81495,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81514,7 +81514,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81533,7 +81533,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81553,7 +81553,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -81575,7 +81575,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -81593,7 +81593,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -81610,7 +81610,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, strided_cn) {
@@ -81625,7 +81625,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, k_eq_8_strided_a) {
@@ -81640,7 +81640,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__SSE41, k_eq_8_subtile) {
@@ -81657,7 +81657,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81675,7 +81675,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -81692,7 +81692,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -81708,7 +81708,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -81725,7 +81725,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -81744,7 +81744,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -81762,7 +81762,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -81779,7 +81779,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -81798,7 +81798,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -81816,7 +81816,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -81833,7 +81833,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -81852,7 +81852,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -81871,7 +81871,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81890,7 +81890,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81909,7 +81909,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81929,7 +81929,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -81948,7 +81948,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81967,7 +81967,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81986,7 +81986,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -82006,7 +82006,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -82028,7 +82028,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -82046,7 +82046,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -82063,7 +82063,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, strided_cn) {
@@ -82078,7 +82078,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, k_eq_8_strided_a) {
@@ -82093,7 +82093,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__SSE41, k_eq_8_subtile) {
@@ -82110,7 +82110,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -82128,7 +82128,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -82145,7 +82145,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -82161,7 +82161,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -82178,7 +82178,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -82197,7 +82197,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -82215,7 +82215,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -82232,7 +82232,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -82251,7 +82251,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -82269,7 +82269,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -82286,7 +82286,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -82305,7 +82305,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -82324,7 +82324,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -82343,7 +82343,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -82362,7 +82362,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -82382,7 +82382,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -82401,7 +82401,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -82420,7 +82420,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -82439,7 +82439,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -82459,7 +82459,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -82481,7 +82481,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -82499,7 +82499,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -82516,7 +82516,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, strided_cn) {
@@ -82531,7 +82531,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, k_eq_8_strided_a) {
@@ -82546,7 +82546,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__SSE41, k_eq_8_subtile) {
@@ -82563,7 +82563,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -82581,7 +82581,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -82598,7 +82598,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -82614,7 +82614,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -82631,7 +82631,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -82650,7 +82650,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -82668,7 +82668,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -82685,7 +82685,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -82704,7 +82704,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -82722,7 +82722,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -82739,7 +82739,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -82758,7 +82758,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -82777,7 +82777,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -82796,7 +82796,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -82815,7 +82815,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -82835,7 +82835,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -82854,7 +82854,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -82873,7 +82873,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -82892,7 +82892,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -82912,7 +82912,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -82934,7 +82934,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -82952,7 +82952,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -82969,7 +82969,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, strided_cn) {
@@ -82984,7 +82984,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, k_eq_8_strided_a) {
@@ -82999,7 +82999,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__AVX, k_eq_8_subtile) {
@@ -83016,7 +83016,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -83034,7 +83034,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -83051,7 +83051,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -83067,7 +83067,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -83084,7 +83084,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -83103,7 +83103,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -83121,7 +83121,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -83138,7 +83138,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -83157,7 +83157,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -83175,7 +83175,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -83192,7 +83192,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -83211,7 +83211,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -83230,7 +83230,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -83249,7 +83249,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -83268,7 +83268,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -83288,7 +83288,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -83307,7 +83307,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -83326,7 +83326,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -83345,7 +83345,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -83365,7 +83365,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -83387,7 +83387,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -83405,7 +83405,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -83422,7 +83422,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, strided_cn) {
@@ -83437,7 +83437,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, k_eq_8_strided_a) {
@@ -83452,7 +83452,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__AVX, k_eq_8_subtile) {
@@ -83469,7 +83469,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -83487,7 +83487,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -83504,7 +83504,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -83520,7 +83520,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -83537,7 +83537,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -83556,7 +83556,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -83574,7 +83574,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -83591,7 +83591,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -83610,7 +83610,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -83628,7 +83628,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -83645,7 +83645,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -83664,7 +83664,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -83683,7 +83683,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -83702,7 +83702,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -83721,7 +83721,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -83741,7 +83741,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -83760,7 +83760,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -83779,7 +83779,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -83798,7 +83798,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -83818,7 +83818,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -83840,7 +83840,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -83858,7 +83858,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -83875,7 +83875,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, strided_cn) {
@@ -83890,7 +83890,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, k_eq_8_strided_a) {
@@ -83905,7 +83905,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__AVX, k_eq_8_subtile) {
@@ -83922,7 +83922,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -83940,7 +83940,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -83957,7 +83957,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -83973,7 +83973,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -83990,7 +83990,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -84009,7 +84009,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -84027,7 +84027,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -84044,7 +84044,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -84063,7 +84063,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -84081,7 +84081,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -84098,7 +84098,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -84117,7 +84117,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -84136,7 +84136,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -84155,7 +84155,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -84174,7 +84174,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -84194,7 +84194,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -84213,7 +84213,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -84232,7 +84232,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -84251,7 +84251,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -84271,7 +84271,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -84293,7 +84293,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -84311,7 +84311,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -84328,7 +84328,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, strided_cn) {
@@ -84343,7 +84343,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, k_eq_8_strided_a) {
@@ -84358,7 +84358,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__XOP, k_eq_8_subtile) {
@@ -84375,7 +84375,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -84393,7 +84393,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -84410,7 +84410,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -84426,7 +84426,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -84443,7 +84443,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -84462,7 +84462,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -84480,7 +84480,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -84497,7 +84497,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -84516,7 +84516,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -84534,7 +84534,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -84551,7 +84551,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -84570,7 +84570,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -84589,7 +84589,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -84608,7 +84608,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -84627,7 +84627,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -84647,7 +84647,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -84666,7 +84666,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -84685,7 +84685,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -84704,7 +84704,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -84724,7 +84724,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -84746,7 +84746,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -84764,7 +84764,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -84781,7 +84781,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, strided_cn) {
@@ -84796,7 +84796,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, k_eq_8_strided_a) {
@@ -84811,7 +84811,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__XOP, k_eq_8_subtile) {
@@ -84828,7 +84828,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -84846,7 +84846,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -84863,7 +84863,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -84879,7 +84879,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -84896,7 +84896,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -84915,7 +84915,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -84933,7 +84933,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -84950,7 +84950,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -84969,7 +84969,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -84987,7 +84987,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -85004,7 +85004,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -85023,7 +85023,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -85042,7 +85042,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -85061,7 +85061,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -85080,7 +85080,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -85100,7 +85100,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -85119,7 +85119,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -85138,7 +85138,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -85157,7 +85157,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -85177,7 +85177,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -85199,7 +85199,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -85217,7 +85217,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -85234,7 +85234,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, strided_cn) {
@@ -85249,7 +85249,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, k_eq_8_strided_a) {
@@ -85264,7 +85264,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__XOP, k_eq_8_subtile) {
@@ -85281,7 +85281,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -85299,7 +85299,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -85316,7 +85316,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -85332,7 +85332,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -85349,7 +85349,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -85368,7 +85368,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -85386,7 +85386,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -85403,7 +85403,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -85422,7 +85422,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -85440,7 +85440,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -85457,7 +85457,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -85476,7 +85476,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -85495,7 +85495,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -85514,7 +85514,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -85533,7 +85533,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -85553,7 +85553,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -85572,7 +85572,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -85591,7 +85591,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -85610,7 +85610,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -85630,7 +85630,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -85652,7 +85652,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -85670,7 +85670,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -85686,7 +85686,7 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, strided_cn) {
@@ -85700,7 +85700,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_eq_8_strided_a) {
@@ -85714,7 +85714,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_eq_8_subtile) {
@@ -85730,7 +85730,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -85747,7 +85747,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -85763,7 +85763,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -85778,7 +85778,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -85794,7 +85794,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -85812,7 +85812,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -85829,7 +85829,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -85845,7 +85845,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -85863,7 +85863,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -85880,7 +85880,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -85896,7 +85896,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -85914,7 +85914,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -85932,7 +85932,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -85950,7 +85950,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -85968,7 +85968,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -85987,7 +85987,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -86005,7 +86005,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -86023,7 +86023,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -86041,7 +86041,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -86060,7 +86060,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -86081,7 +86081,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -86098,7 +86098,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, qmax) {
@@ -86112,7 +86112,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, strided_cm) {
@@ -86126,7 +86126,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -86142,7 +86142,7 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, strided_cn) {
@@ -86156,7 +86156,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_eq_8_strided_a) {
@@ -86170,7 +86170,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_eq_8_subtile) {
@@ -86186,7 +86186,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -86203,7 +86203,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -86219,7 +86219,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -86234,7 +86234,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -86250,7 +86250,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -86268,7 +86268,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -86285,7 +86285,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -86301,7 +86301,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -86319,7 +86319,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -86336,7 +86336,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -86352,7 +86352,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -86370,7 +86370,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -86388,7 +86388,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -86406,7 +86406,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -86424,7 +86424,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -86443,7 +86443,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -86461,7 +86461,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -86479,7 +86479,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -86497,7 +86497,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -86516,7 +86516,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -86537,7 +86537,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -86554,7 +86554,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, qmax) {
@@ -86568,7 +86568,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, strided_cm) {
@@ -86582,7 +86582,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -86598,7 +86598,7 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, strided_cn) {
@@ -86612,7 +86612,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_eq_8_strided_a) {
@@ -86626,7 +86626,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_eq_8_subtile) {
@@ -86642,7 +86642,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -86659,7 +86659,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -86675,7 +86675,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -86690,7 +86690,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -86706,7 +86706,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -86724,7 +86724,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -86741,7 +86741,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -86757,7 +86757,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -86775,7 +86775,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -86792,7 +86792,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -86808,7 +86808,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -86826,7 +86826,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -86844,7 +86844,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -86862,7 +86862,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -86880,7 +86880,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -86899,7 +86899,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -86917,7 +86917,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -86935,7 +86935,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -86953,7 +86953,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -86972,7 +86972,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -86993,7 +86993,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -87010,7 +87010,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, qmax) {
@@ -87024,7 +87024,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, strided_cm) {
@@ -87038,7 +87038,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -87055,7 +87055,7 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, strided_cn) {
@@ -87070,7 +87070,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, k_eq_8_strided_a) {
@@ -87085,7 +87085,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X8C8__AVX2, k_eq_8_subtile) {
@@ -87102,7 +87102,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -87120,7 +87120,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -87137,7 +87137,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -87153,7 +87153,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -87170,7 +87170,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -87189,7 +87189,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -87207,7 +87207,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -87224,7 +87224,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -87243,7 +87243,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -87261,7 +87261,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -87278,7 +87278,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -87297,7 +87297,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -87316,7 +87316,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -87335,7 +87335,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -87354,7 +87354,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -87374,7 +87374,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -87393,7 +87393,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -87412,7 +87412,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -87431,7 +87431,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -87451,7 +87451,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -87473,7 +87473,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -87491,7 +87491,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -87508,7 +87508,7 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, strided_cn) {
@@ -87523,7 +87523,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, k_eq_8_strided_a) {
@@ -87538,7 +87538,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X8C8__AVX2, k_eq_8_subtile) {
@@ -87555,7 +87555,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -87573,7 +87573,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -87590,7 +87590,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -87606,7 +87606,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -87623,7 +87623,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -87642,7 +87642,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -87660,7 +87660,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -87677,7 +87677,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -87696,7 +87696,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -87714,7 +87714,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -87731,7 +87731,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -87750,7 +87750,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -87769,7 +87769,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -87788,7 +87788,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -87807,7 +87807,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -87827,7 +87827,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -87846,7 +87846,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -87865,7 +87865,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -87884,7 +87884,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -87904,7 +87904,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -87926,7 +87926,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -87944,7 +87944,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -87961,7 +87961,7 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, strided_cn) {
@@ -87976,7 +87976,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, k_eq_8_strided_a) {
@@ -87991,7 +87991,7 @@
       .n(8)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X8C8__AVX2, k_eq_8_subtile) {
@@ -88008,7 +88008,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -88026,7 +88026,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -88043,7 +88043,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -88059,7 +88059,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -88076,7 +88076,7 @@
         .n(8)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -88095,7 +88095,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -88113,7 +88113,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -88130,7 +88130,7 @@
         .n(8)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -88149,7 +88149,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -88167,7 +88167,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -88184,7 +88184,7 @@
         .n(8)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -88203,7 +88203,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -88222,7 +88222,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -88241,7 +88241,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -88260,7 +88260,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -88280,7 +88280,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -88299,7 +88299,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -88318,7 +88318,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -88337,7 +88337,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -88357,7 +88357,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -88379,7 +88379,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -88397,7 +88397,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -88413,7 +88413,7 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, strided_cn) {
@@ -88427,7 +88427,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_eq_8_strided_a) {
@@ -88441,7 +88441,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_eq_8_subtile) {
@@ -88457,7 +88457,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -88474,7 +88474,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -88490,7 +88490,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -88505,7 +88505,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -88521,7 +88521,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -88539,7 +88539,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -88556,7 +88556,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -88572,7 +88572,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -88590,7 +88590,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -88607,7 +88607,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -88623,7 +88623,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -88641,7 +88641,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -88659,7 +88659,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -88677,7 +88677,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -88695,7 +88695,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -88714,7 +88714,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -88732,7 +88732,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -88750,7 +88750,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -88768,7 +88768,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -88787,7 +88787,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -88808,7 +88808,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -88825,7 +88825,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, qmax) {
@@ -88839,7 +88839,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, strided_cm) {
@@ -88853,7 +88853,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -88869,7 +88869,7 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, strided_cn) {
@@ -88883,7 +88883,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_eq_8_strided_a) {
@@ -88897,7 +88897,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_eq_8_subtile) {
@@ -88913,7 +88913,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -88930,7 +88930,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -88946,7 +88946,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -88961,7 +88961,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -88977,7 +88977,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -88995,7 +88995,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -89012,7 +89012,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -89028,7 +89028,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -89046,7 +89046,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -89063,7 +89063,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -89079,7 +89079,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -89097,7 +89097,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -89115,7 +89115,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -89133,7 +89133,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -89151,7 +89151,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -89170,7 +89170,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -89188,7 +89188,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -89206,7 +89206,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -89224,7 +89224,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -89243,7 +89243,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -89264,7 +89264,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -89281,7 +89281,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, qmax) {
@@ -89295,7 +89295,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, strided_cm) {
@@ -89309,7 +89309,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -89325,7 +89325,7 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, strided_cn) {
@@ -89339,7 +89339,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_eq_8_strided_a) {
@@ -89353,7 +89353,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_eq_8_subtile) {
@@ -89369,7 +89369,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -89386,7 +89386,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -89402,7 +89402,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -89417,7 +89417,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -89433,7 +89433,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -89451,7 +89451,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -89468,7 +89468,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -89484,7 +89484,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -89502,7 +89502,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -89519,7 +89519,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -89535,7 +89535,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -89553,7 +89553,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -89571,7 +89571,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -89589,7 +89589,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -89607,7 +89607,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -89626,7 +89626,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -89644,7 +89644,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -89662,7 +89662,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -89680,7 +89680,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -89699,7 +89699,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -89720,7 +89720,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -89737,7 +89737,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, qmax) {
@@ -89751,7 +89751,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, strided_cm) {
@@ -89765,7 +89765,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -89781,7 +89781,7 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, strided_cn) {
@@ -89795,7 +89795,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_eq_8_strided_a) {
@@ -89809,7 +89809,7 @@
       .n(16)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_eq_8_subtile) {
@@ -89825,7 +89825,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -89842,7 +89842,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -89858,7 +89858,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -89873,7 +89873,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -89889,7 +89889,7 @@
         .n(16)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -89907,7 +89907,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -89924,7 +89924,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -89940,7 +89940,7 @@
         .n(16)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -89958,7 +89958,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -89975,7 +89975,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -89991,7 +89991,7 @@
         .n(16)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -90009,7 +90009,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -90027,7 +90027,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -90045,7 +90045,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -90063,7 +90063,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -90082,7 +90082,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -90100,7 +90100,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -90118,7 +90118,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -90136,7 +90136,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -90155,7 +90155,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -90176,7 +90176,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -90193,7 +90193,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, qmax) {
@@ -90207,7 +90207,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, strided_cm) {
@@ -90221,7 +90221,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -90236,7 +90236,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, strided_cn) {
@@ -90249,7 +90249,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_eq_8_strided_a) {
@@ -90262,7 +90262,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_eq_8_subtile) {
@@ -90277,7 +90277,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -90293,7 +90293,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -90308,7 +90308,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -90322,7 +90322,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -90337,7 +90337,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -90354,7 +90354,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -90370,7 +90370,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -90385,7 +90385,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -90402,7 +90402,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -90418,7 +90418,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -90433,7 +90433,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -90450,7 +90450,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -90467,7 +90467,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -90484,7 +90484,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -90501,7 +90501,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -90519,7 +90519,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -90536,7 +90536,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -90553,7 +90553,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -90570,7 +90570,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -90588,7 +90588,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -90608,7 +90608,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -90624,7 +90624,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, qmax) {
@@ -90637,7 +90637,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, strided_cm) {
@@ -90650,7 +90650,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
@@ -90665,7 +90665,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, strided_cn) {
@@ -90678,7 +90678,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_eq_8_strided_a) {
@@ -90691,7 +90691,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_eq_8_subtile) {
@@ -90706,7 +90706,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -90722,7 +90722,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -90737,7 +90737,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -90751,7 +90751,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -90766,7 +90766,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -90783,7 +90783,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -90799,7 +90799,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -90814,7 +90814,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -90831,7 +90831,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -90847,7 +90847,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -90862,7 +90862,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -90879,7 +90879,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -90896,7 +90896,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -90913,7 +90913,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -90930,7 +90930,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -90948,7 +90948,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -90965,7 +90965,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -90982,7 +90982,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -90999,7 +90999,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -91017,7 +91017,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -91037,7 +91037,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -91053,7 +91053,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, qmax) {
@@ -91066,7 +91066,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, strided_cm) {
@@ -91079,7 +91079,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
@@ -91094,7 +91094,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, strided_cn) {
@@ -91107,7 +91107,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_eq_8_strided_a) {
@@ -91120,7 +91120,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_eq_8_subtile) {
@@ -91135,7 +91135,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -91151,7 +91151,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -91166,7 +91166,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -91180,7 +91180,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -91195,7 +91195,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -91212,7 +91212,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -91228,7 +91228,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -91243,7 +91243,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -91260,7 +91260,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -91276,7 +91276,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -91291,7 +91291,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -91308,7 +91308,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -91325,7 +91325,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -91342,7 +91342,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -91359,7 +91359,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -91377,7 +91377,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -91394,7 +91394,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -91411,7 +91411,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -91428,7 +91428,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -91446,7 +91446,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -91466,7 +91466,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -91482,7 +91482,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, qmax) {
@@ -91495,7 +91495,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, strided_cm) {
@@ -91508,7 +91508,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
@@ -91523,7 +91523,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, strided_cn) {
@@ -91536,7 +91536,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_eq_8_strided_a) {
@@ -91549,7 +91549,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_eq_8_subtile) {
@@ -91564,7 +91564,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -91580,7 +91580,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -91595,7 +91595,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -91609,7 +91609,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -91624,7 +91624,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -91641,7 +91641,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -91657,7 +91657,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -91672,7 +91672,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -91689,7 +91689,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -91705,7 +91705,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -91720,7 +91720,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -91737,7 +91737,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -91754,7 +91754,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -91771,7 +91771,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -91788,7 +91788,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -91806,7 +91806,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -91823,7 +91823,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -91840,7 +91840,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -91857,7 +91857,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -91875,7 +91875,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -91895,7 +91895,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -91911,7 +91911,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, qmax) {
@@ -91924,7 +91924,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, strided_cm) {
@@ -91937,7 +91937,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
@@ -91952,7 +91952,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, strided_cn) {
@@ -91965,7 +91965,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_eq_8_strided_a) {
@@ -91978,7 +91978,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_eq_8_subtile) {
@@ -91993,7 +91993,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -92009,7 +92009,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -92024,7 +92024,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -92038,7 +92038,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -92053,7 +92053,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -92070,7 +92070,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -92086,7 +92086,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -92101,7 +92101,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -92118,7 +92118,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -92134,7 +92134,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -92149,7 +92149,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -92166,7 +92166,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -92183,7 +92183,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -92200,7 +92200,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -92217,7 +92217,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -92235,7 +92235,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -92252,7 +92252,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -92269,7 +92269,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -92286,7 +92286,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -92304,7 +92304,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -92324,7 +92324,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -92340,7 +92340,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, qmax) {
@@ -92353,7 +92353,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, strided_cm) {
@@ -92366,7 +92366,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
@@ -92381,7 +92381,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, strided_cn) {
@@ -92394,7 +92394,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_eq_8_strided_a) {
@@ -92407,7 +92407,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_eq_8_subtile) {
@@ -92422,7 +92422,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -92438,7 +92438,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -92453,7 +92453,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -92467,7 +92467,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -92482,7 +92482,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -92499,7 +92499,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -92515,7 +92515,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -92530,7 +92530,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -92547,7 +92547,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -92563,7 +92563,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -92578,7 +92578,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -92595,7 +92595,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -92612,7 +92612,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -92629,7 +92629,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -92646,7 +92646,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -92664,7 +92664,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -92681,7 +92681,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -92698,7 +92698,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -92715,7 +92715,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -92733,7 +92733,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -92753,7 +92753,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -92769,7 +92769,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, qmax) {
@@ -92782,7 +92782,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, strided_cm) {
@@ -92795,7 +92795,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
@@ -92811,7 +92811,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, strided_cn) {
@@ -92825,7 +92825,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, k_eq_8_strided_a) {
@@ -92839,7 +92839,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_1X4C8__WASMSIMD, k_eq_8_subtile) {
@@ -92855,7 +92855,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -92872,7 +92872,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -92888,7 +92888,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -92903,7 +92903,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -92919,7 +92919,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -92937,7 +92937,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -92954,7 +92954,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -92970,7 +92970,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -92988,7 +92988,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -93005,7 +93005,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -93021,7 +93021,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -93039,7 +93039,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -93057,7 +93057,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -93075,7 +93075,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -93093,7 +93093,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -93112,7 +93112,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -93130,7 +93130,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -93148,7 +93148,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -93166,7 +93166,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -93185,7 +93185,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -93206,7 +93206,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -93223,7 +93223,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
@@ -93239,7 +93239,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, strided_cn) {
@@ -93253,7 +93253,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, k_eq_8_strided_a) {
@@ -93267,7 +93267,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_2X4C8__WASMSIMD, k_eq_8_subtile) {
@@ -93283,7 +93283,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -93300,7 +93300,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -93316,7 +93316,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -93331,7 +93331,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -93347,7 +93347,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -93365,7 +93365,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -93382,7 +93382,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -93398,7 +93398,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -93416,7 +93416,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -93433,7 +93433,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -93449,7 +93449,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -93467,7 +93467,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -93485,7 +93485,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -93503,7 +93503,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -93521,7 +93521,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -93540,7 +93540,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -93558,7 +93558,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -93576,7 +93576,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -93594,7 +93594,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -93613,7 +93613,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -93634,7 +93634,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -93651,7 +93651,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
@@ -93667,7 +93667,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, strided_cn) {
@@ -93681,7 +93681,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, k_eq_8_strided_a) {
@@ -93695,7 +93695,7 @@
       .n(4)
       .k(8)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_GEMM_XW_MINMAX_GEMMLOWP_3X4C8__WASMSIMD, k_eq_8_subtile) {
@@ -93711,7 +93711,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -93728,7 +93728,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -93744,7 +93744,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -93759,7 +93759,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -93775,7 +93775,7 @@
         .n(4)
         .k(k)
         .a_stride(11)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -93793,7 +93793,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -93810,7 +93810,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -93826,7 +93826,7 @@
         .n(4)
         .k(k)
         .a_stride(19)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -93844,7 +93844,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -93861,7 +93861,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -93877,7 +93877,7 @@
         .n(4)
         .k(k)
         .a_stride(83)
-        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -93895,7 +93895,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -93913,7 +93913,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -93931,7 +93931,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -93949,7 +93949,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -93968,7 +93968,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -93986,7 +93986,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -94004,7 +94004,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -94022,7 +94022,7 @@
           .n(n)
           .k(k)
           .a_stride(43)
-          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -94041,7 +94041,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -94062,7 +94062,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -94079,7 +94079,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
@@ -94093,7 +94093,7 @@
     .m(1)
     .n(2)
     .k(1)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, strided_cn) {
@@ -94106,7 +94106,7 @@
     .n(2)
     .k(1)
     .cn_stride(5)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, k_eq_1_strided_a) {
@@ -94119,7 +94119,7 @@
     .n(2)
     .k(1)
     .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, k_eq_1_subtile) {
@@ -94134,7 +94134,7 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -94150,7 +94150,7 @@
       .n(2)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -94165,7 +94165,7 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -94179,7 +94179,7 @@
       .m(1)
       .n(2)
       .k(k)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -94194,7 +94194,7 @@
       .n(2)
       .k(k)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -94211,7 +94211,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -94228,7 +94228,7 @@
         .m(1)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -94245,7 +94245,7 @@
         .n(2)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -94262,7 +94262,7 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -94280,7 +94280,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -94297,7 +94297,7 @@
         .m(1)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -94314,7 +94314,7 @@
         .n(n)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -94331,7 +94331,7 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -94349,7 +94349,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -94369,7 +94369,7 @@
           .k(k)
           .cm_stride(5)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -94385,7 +94385,7 @@
     .n(2)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, qmax) {
@@ -94398,7 +94398,7 @@
     .n(2)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X2__SCALAR, strided_cm) {
@@ -94411,7 +94411,7 @@
     .n(2)
     .k(1)
     .cm_stride(5)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 
@@ -94424,7 +94424,7 @@
     .m(2)
     .n(2)
     .k(1)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, strided_cn) {
@@ -94437,7 +94437,7 @@
     .n(2)
     .k(1)
     .cn_stride(5)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, k_eq_1_strided_a) {
@@ -94450,7 +94450,7 @@
     .n(2)
     .k(1)
     .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, k_eq_1_subtile) {
@@ -94465,7 +94465,7 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -94481,7 +94481,7 @@
       .n(2)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -94496,7 +94496,7 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -94510,7 +94510,7 @@
       .m(2)
       .n(2)
       .k(k)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -94525,7 +94525,7 @@
       .n(2)
       .k(k)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -94542,7 +94542,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -94559,7 +94559,7 @@
         .m(2)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -94576,7 +94576,7 @@
         .n(2)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -94593,7 +94593,7 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -94611,7 +94611,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -94628,7 +94628,7 @@
         .m(2)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -94645,7 +94645,7 @@
         .n(n)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -94662,7 +94662,7 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -94680,7 +94680,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -94700,7 +94700,7 @@
           .k(k)
           .cm_stride(5)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -94716,7 +94716,7 @@
     .n(2)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, qmax) {
@@ -94729,7 +94729,7 @@
     .n(2)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X2__SCALAR, strided_cm) {
@@ -94742,7 +94742,7 @@
     .n(2)
     .k(1)
     .cm_stride(5)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 
@@ -94755,7 +94755,7 @@
     .m(3)
     .n(2)
     .k(1)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, strided_cn) {
@@ -94768,7 +94768,7 @@
     .n(2)
     .k(1)
     .cn_stride(5)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, k_eq_1_strided_a) {
@@ -94781,7 +94781,7 @@
     .n(2)
     .k(1)
     .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, k_eq_1_subtile) {
@@ -94796,7 +94796,7 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -94812,7 +94812,7 @@
       .n(2)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -94827,7 +94827,7 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -94841,7 +94841,7 @@
       .m(3)
       .n(2)
       .k(k)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -94856,7 +94856,7 @@
       .n(2)
       .k(k)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -94873,7 +94873,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -94890,7 +94890,7 @@
         .m(3)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -94907,7 +94907,7 @@
         .n(2)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -94924,7 +94924,7 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -94942,7 +94942,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -94959,7 +94959,7 @@
         .m(3)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -94976,7 +94976,7 @@
         .n(n)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -94993,7 +94993,7 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -95011,7 +95011,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -95031,7 +95031,7 @@
           .k(k)
           .cm_stride(5)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -95047,7 +95047,7 @@
     .n(2)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, qmax) {
@@ -95060,7 +95060,7 @@
     .n(2)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X2__SCALAR, strided_cm) {
@@ -95073,7 +95073,7 @@
     .n(2)
     .k(1)
     .cm_stride(5)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 
@@ -95086,7 +95086,7 @@
     .m(4)
     .n(2)
     .k(1)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, strided_cn) {
@@ -95099,7 +95099,7 @@
     .n(2)
     .k(1)
     .cn_stride(5)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, k_eq_1_strided_a) {
@@ -95112,7 +95112,7 @@
     .n(2)
     .k(1)
     .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, k_eq_1_subtile) {
@@ -95127,7 +95127,7 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -95143,7 +95143,7 @@
       .n(2)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -95158,7 +95158,7 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -95172,7 +95172,7 @@
       .m(4)
       .n(2)
       .k(k)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -95187,7 +95187,7 @@
       .n(2)
       .k(k)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -95204,7 +95204,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -95221,7 +95221,7 @@
         .m(4)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -95238,7 +95238,7 @@
         .n(2)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -95255,7 +95255,7 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -95273,7 +95273,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -95290,7 +95290,7 @@
         .m(4)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -95307,7 +95307,7 @@
         .n(n)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -95324,7 +95324,7 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -95342,7 +95342,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -95362,7 +95362,7 @@
           .k(k)
           .cm_stride(5)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -95378,7 +95378,7 @@
     .n(2)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, qmax) {
@@ -95391,7 +95391,7 @@
     .n(2)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X2__SCALAR, strided_cm) {
@@ -95404,7 +95404,7 @@
     .n(2)
     .k(1)
     .cm_stride(5)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 
@@ -95417,7 +95417,7 @@
     .m(1)
     .n(4)
     .k(1)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, strided_cn) {
@@ -95430,7 +95430,7 @@
     .n(4)
     .k(1)
     .cn_stride(7)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, k_eq_1_strided_a) {
@@ -95443,7 +95443,7 @@
     .n(4)
     .k(1)
     .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, k_eq_1_subtile) {
@@ -95458,7 +95458,7 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -95474,7 +95474,7 @@
       .n(4)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -95489,7 +95489,7 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -95503,7 +95503,7 @@
       .m(1)
       .n(4)
       .k(k)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -95518,7 +95518,7 @@
       .n(4)
       .k(k)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -95535,7 +95535,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -95552,7 +95552,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -95569,7 +95569,7 @@
         .n(4)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -95586,7 +95586,7 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -95604,7 +95604,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -95621,7 +95621,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -95638,7 +95638,7 @@
         .n(n)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -95655,7 +95655,7 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -95673,7 +95673,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -95693,7 +95693,7 @@
           .k(k)
           .cm_stride(7)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -95709,7 +95709,7 @@
     .n(4)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, qmax) {
@@ -95722,7 +95722,7 @@
     .n(4)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_1X4__SCALAR, strided_cm) {
@@ -95735,7 +95735,7 @@
     .n(4)
     .k(1)
     .cm_stride(7)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 
@@ -95748,7 +95748,7 @@
     .m(2)
     .n(4)
     .k(1)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, strided_cn) {
@@ -95761,7 +95761,7 @@
     .n(4)
     .k(1)
     .cn_stride(7)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, k_eq_1_strided_a) {
@@ -95774,7 +95774,7 @@
     .n(4)
     .k(1)
     .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, k_eq_1_subtile) {
@@ -95789,7 +95789,7 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -95805,7 +95805,7 @@
       .n(4)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -95820,7 +95820,7 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -95834,7 +95834,7 @@
       .m(2)
       .n(4)
       .k(k)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -95849,7 +95849,7 @@
       .n(4)
       .k(k)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -95866,7 +95866,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -95883,7 +95883,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -95900,7 +95900,7 @@
         .n(4)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -95917,7 +95917,7 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -95935,7 +95935,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -95952,7 +95952,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -95969,7 +95969,7 @@
         .n(n)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -95986,7 +95986,7 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -96004,7 +96004,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -96024,7 +96024,7 @@
           .k(k)
           .cm_stride(7)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -96040,7 +96040,7 @@
     .n(4)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, qmax) {
@@ -96053,7 +96053,7 @@
     .n(4)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_2X4__SCALAR, strided_cm) {
@@ -96066,7 +96066,7 @@
     .n(4)
     .k(1)
     .cm_stride(7)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 
@@ -96079,7 +96079,7 @@
     .m(3)
     .n(4)
     .k(1)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, strided_cn) {
@@ -96092,7 +96092,7 @@
     .n(4)
     .k(1)
     .cn_stride(7)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, k_eq_1_strided_a) {
@@ -96105,7 +96105,7 @@
     .n(4)
     .k(1)
     .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, k_eq_1_subtile) {
@@ -96120,7 +96120,7 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -96136,7 +96136,7 @@
       .n(4)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -96151,7 +96151,7 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -96165,7 +96165,7 @@
       .m(3)
       .n(4)
       .k(k)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -96180,7 +96180,7 @@
       .n(4)
       .k(k)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -96197,7 +96197,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -96214,7 +96214,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -96231,7 +96231,7 @@
         .n(4)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -96248,7 +96248,7 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -96266,7 +96266,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -96283,7 +96283,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -96300,7 +96300,7 @@
         .n(n)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -96317,7 +96317,7 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -96335,7 +96335,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -96355,7 +96355,7 @@
           .k(k)
           .cm_stride(7)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -96371,7 +96371,7 @@
     .n(4)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, qmax) {
@@ -96384,7 +96384,7 @@
     .n(4)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_3X4__SCALAR, strided_cm) {
@@ -96397,7 +96397,7 @@
     .n(4)
     .k(1)
     .cm_stride(7)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 
@@ -96410,7 +96410,7 @@
     .m(4)
     .n(4)
     .k(1)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, strided_cn) {
@@ -96423,7 +96423,7 @@
     .n(4)
     .k(1)
     .cn_stride(7)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, k_eq_1_strided_a) {
@@ -96436,7 +96436,7 @@
     .n(4)
     .k(1)
     .a_stride(3)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, k_eq_1_subtile) {
@@ -96451,7 +96451,7 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -96467,7 +96467,7 @@
       .n(4)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -96482,7 +96482,7 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -96496,7 +96496,7 @@
       .m(4)
       .n(4)
       .k(k)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -96511,7 +96511,7 @@
       .n(4)
       .k(k)
       .a_stride(11)
-      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -96528,7 +96528,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -96545,7 +96545,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -96562,7 +96562,7 @@
         .n(4)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -96579,7 +96579,7 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -96597,7 +96597,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -96614,7 +96614,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -96631,7 +96631,7 @@
         .n(n)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -96648,7 +96648,7 @@
         .n(n)
         .k(k)
         .a_stride(7)
-        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -96666,7 +96666,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -96686,7 +96686,7 @@
           .k(k)
           .cm_stride(7)
           .iterations(1)
-          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -96702,7 +96702,7 @@
     .n(4)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, qmax) {
@@ -96715,7 +96715,7 @@
     .n(4)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_GEMM_MINMAX_GEMMLOWP_4X4__SCALAR, strided_cm) {
@@ -96728,5 +96728,5 @@
     .n(4)
     .k(1)
     .cm_stride(7)
-    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
diff --git a/test/qs8-gemm-minmax-gemmlowp.yaml b/test/qs8-gemm-minmax-gemmlowp.yaml
index a626d3d..6af3fd4 100644
--- a/test/qs8-gemm-minmax-gemmlowp.yaml
+++ b/test/qs8-gemm-minmax-gemmlowp.yaml
@@ -4,647 +4,647 @@
 # LICENSE file in the root directory of this source tree.
 
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 4
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 4
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2
-  init: xnn_init_qs8_conv_minmax_avx2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_avx2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2
-  init: xnn_init_qs8_conv_minmax_avx2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_avx2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2
-  init: xnn_init_qs8_conv_minmax_avx2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_avx2_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2
-  init: xnn_init_qs8_conv_minmax_avx2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_avx2_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2
-  init: xnn_init_qs8_conv_minmax_avx2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_avx2_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2
-  init: xnn_init_qs8_conv_minmax_avx2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_avx2_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64
-  init: xnn_init_qs8_conv_minmax_wasmsimd_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64
-  init: xnn_init_qs8_conv_minmax_wasmsimd_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64
-  init: xnn_init_qs8_conv_minmax_wasmsimd_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128
-  init: xnn_init_qs8_conv_minmax_wasmsimd_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128
-  init: xnn_init_qs8_conv_minmax_wasmsimd_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128
-  init: xnn_init_qs8_conv_minmax_wasmsimd_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd
-  init: xnn_init_qs8_conv_minmax_wasmsimd_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd
-  init: xnn_init_qs8_conv_minmax_wasmsimd_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params
   k-block: 8
 - name: xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd
-  init: xnn_init_qs8_conv_minmax_wasmsimd_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params
   k-block: 8
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar
-  init: xnn_init_qs8_conv_minmax_scalar_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_scalar_params
   k-block: 1
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar
-  init: xnn_init_qs8_conv_minmax_scalar_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_scalar_params
   k-block: 1
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar
-  init: xnn_init_qs8_conv_minmax_scalar_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_scalar_params
   k-block: 1
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar
-  init: xnn_init_qs8_conv_minmax_scalar_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_scalar_params
   k-block: 1
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar
-  init: xnn_init_qs8_conv_minmax_scalar_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_scalar_params
   k-block: 1
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar
-  init: xnn_init_qs8_conv_minmax_scalar_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_scalar_params
   k-block: 1
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar
-  init: xnn_init_qs8_conv_minmax_scalar_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_scalar_params
   k-block: 1
 - name: xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar
-  init: xnn_init_qs8_conv_minmax_scalar_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_scalar_params
   k-block: 1
diff --git a/test/qs8-igemm-minmax-fp32.cc b/test/qs8-igemm-minmax-fp32.cc
new file mode 100644
index 0000000..cbfbf1a
--- /dev/null
+++ b/test/qs8-igemm-minmax-fp32.cc
@@ -0,0 +1,1426 @@
+// Copyright (c) Facebook, Inc. and its affiliates.
+// All rights reserved.
+//
+// Copyright 2019 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+//
+// Auto-generated file. Do not edit!
+//   Specification: test/qs8-igemm-minmax-fp32.yaml
+//   Generator: tools/generate-gemm-test.py
+
+
+#include <gtest/gtest.h>
+
+#include <xnnpack/common.h>
+#include <xnnpack/isa-checks.h>
+
+#include <xnnpack/gemm.h>
+#include <xnnpack/igemm.h>
+#include <xnnpack/ppmm.h>
+#include "gemm-microkernel-tester.h"
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(1)
+      .n(8)
+      .k(8)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cn) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(1)
+      .n(8)
+      .k(8)
+      .cn_stride(11)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t m = 1; m <= 1; m++) {
+      for (uint32_t n = 1; n <= 8; n++) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(8)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_m) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t m = 1; m <= 1; m++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(m)
+        .n(8)
+        .k(8)
+        .iterations(1)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_eq_8_subtile_n) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 1; n <= 8; n++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(1)
+        .n(n)
+        .k(8)
+        .iterations(1)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k < 8; k++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(1)
+        .n(8)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_lt_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k < 8; k++) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(1)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 9; k < 16; k++) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(1)
+        .n(8)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_gt_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 9; k < 16; k++) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(1)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 16; k <= 80; k += 8) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(1)
+        .n(8)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, k_div_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 16; k <= 80; k += 8) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(1)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(1)
+          .n(8)
+          .k(k)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_strided_cn) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(1)
+          .n(8)
+          .k(k)
+          .cn_stride(11)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        for (uint32_t m = 1; m <= 1; m++) {
+          GemmMicrokernelTester()
+            .mr(1)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(1)
+          .n(8)
+          .k(k)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_strided_cn) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(1)
+          .n(n)
+          .k(k)
+          .cn_stride(11)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        for (uint32_t m = 1; m <= 1; m++) {
+          GemmMicrokernelTester()
+            .mr(1)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, small_kernel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k <= 40; k += 9) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(1)
+        .n(8)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, small_kernel_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k <= 40; k += 9) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(1)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .ks(3)
+            .iterations(1)
+            .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_gt_8_small_kernel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(1)
+          .n(8)
+          .k(k)
+          .ks(3)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, n_div_8_small_kernel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(1)
+          .n(8)
+          .k(k)
+          .ks(3)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k <= 40; k += 9) {
+      for (uint32_t m = 1; m <= 1; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(1)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .cm_stride(11)
+            .iterations(1)
+            .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, a_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k <= 40; k += 9) {
+      GemmMicrokernelTester()
+        .mr(1)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(1)
+        .n(8)
+        .k(k)
+        .ks(3)
+        .a_offset(43)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 1; mz++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(1)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(1)
+          .n(8)
+          .k(k)
+          .ks(3)
+          .a_offset(43)
+          .zero_index(mz)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(1)
+      .n(8)
+      .k(8)
+      .qmin(128)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(1)
+      .n(8)
+      .k(8)
+      .qmax(128)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_1X8C8__AVX2, strided_cm) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(1)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(1)
+      .n(8)
+      .k(8)
+      .cm_stride(11)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(2)
+      .n(8)
+      .k(8)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, strided_cn) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(2)
+      .n(8)
+      .k(8)
+      .cn_stride(11)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t m = 1; m <= 2; m++) {
+      for (uint32_t n = 1; n <= 8; n++) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(8)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_m) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t m = 1; m <= 2; m++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(m)
+        .n(8)
+        .k(8)
+        .iterations(1)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_eq_8_subtile_n) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 1; n <= 8; n++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(2)
+        .n(n)
+        .k(8)
+        .iterations(1)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k < 8; k++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(2)
+        .n(8)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_lt_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k < 8; k++) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(2)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 9; k < 16; k++) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(2)
+        .n(8)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_gt_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 9; k < 16; k++) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(2)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 16; k <= 80; k += 8) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(2)
+        .n(8)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, k_div_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 16; k <= 80; k += 8) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(2)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(2)
+          .n(8)
+          .k(k)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_strided_cn) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(2)
+          .n(8)
+          .k(k)
+          .cn_stride(11)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        for (uint32_t m = 1; m <= 2; m++) {
+          GemmMicrokernelTester()
+            .mr(2)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(2)
+          .n(8)
+          .k(k)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_strided_cn) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(2)
+          .n(n)
+          .k(k)
+          .cn_stride(11)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        for (uint32_t m = 1; m <= 2; m++) {
+          GemmMicrokernelTester()
+            .mr(2)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, small_kernel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k <= 40; k += 9) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(2)
+        .n(8)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, small_kernel_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k <= 40; k += 9) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(2)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .ks(3)
+            .iterations(1)
+            .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_gt_8_small_kernel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(2)
+          .n(8)
+          .k(k)
+          .ks(3)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, n_div_8_small_kernel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(2)
+          .n(8)
+          .k(k)
+          .ks(3)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k <= 40; k += 9) {
+      for (uint32_t m = 1; m <= 2; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(2)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .cm_stride(11)
+            .iterations(1)
+            .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, a_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k <= 40; k += 9) {
+      GemmMicrokernelTester()
+        .mr(2)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(2)
+        .n(8)
+        .k(k)
+        .ks(3)
+        .a_offset(83)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 2; mz++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(2)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(2)
+          .n(8)
+          .k(k)
+          .ks(3)
+          .a_offset(83)
+          .zero_index(mz)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(2)
+      .n(8)
+      .k(8)
+      .qmin(128)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(2)
+      .n(8)
+      .k(8)
+      .qmax(128)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_2X8C8__AVX2, strided_cm) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(2)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(2)
+      .n(8)
+      .k(8)
+      .cm_stride(11)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
+
+
+#if XNN_ARCH_X86 || XNN_ARCH_X86_64
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(3)
+      .n(8)
+      .k(8)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, strided_cn) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(3)
+      .n(8)
+      .k(8)
+      .cn_stride(11)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t m = 1; m <= 3; m++) {
+      for (uint32_t n = 1; n <= 8; n++) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(m)
+          .n(n)
+          .k(8)
+          .iterations(1)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_m) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t m = 1; m <= 3; m++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(m)
+        .n(8)
+        .k(8)
+        .iterations(1)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_eq_8_subtile_n) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 1; n <= 8; n++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(3)
+        .n(n)
+        .k(8)
+        .iterations(1)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k < 8; k++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(3)
+        .n(8)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_lt_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k < 8; k++) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(3)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 9; k < 16; k++) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(3)
+        .n(8)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_gt_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 9; k < 16; k++) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(3)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 16; k <= 80; k += 8) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(3)
+        .n(8)
+        .k(k)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, k_div_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 16; k <= 80; k += 8) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(3)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(3)
+          .n(8)
+          .k(k)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_strided_cn) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(3)
+          .n(8)
+          .k(k)
+          .cn_stride(11)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        for (uint32_t m = 1; m <= 3; m++) {
+          GemmMicrokernelTester()
+            .mr(3)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(3)
+          .n(8)
+          .k(k)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_strided_cn) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(3)
+          .n(n)
+          .k(k)
+          .cn_stride(11)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        for (uint32_t m = 1; m <= 3; m++) {
+          GemmMicrokernelTester()
+            .mr(3)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .iterations(1)
+            .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, small_kernel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k <= 40; k += 9) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(3)
+        .n(8)
+        .k(k)
+        .ks(3)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, small_kernel_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k <= 40; k += 9) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(3)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .ks(3)
+            .iterations(1)
+            .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_gt_8_small_kernel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 9; n < 16; n++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(3)
+          .n(8)
+          .k(k)
+          .ks(3)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, n_div_8_small_kernel) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t n = 16; n <= 24; n += 8) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(3)
+          .n(8)
+          .k(k)
+          .ks(3)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm_subtile) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k <= 40; k += 9) {
+      for (uint32_t m = 1; m <= 3; m++) {
+        for (uint32_t n = 1; n <= 8; n++) {
+          GemmMicrokernelTester()
+            .mr(3)
+            .nr(8)
+            .kr(8)
+            .sr(1)
+            .m(m)
+            .n(n)
+            .k(k)
+            .cm_stride(11)
+            .iterations(1)
+            .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+        }
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, a_offset) {
+    TEST_REQUIRES_X86_AVX2;
+    for (size_t k = 1; k <= 40; k += 9) {
+      GemmMicrokernelTester()
+        .mr(3)
+        .nr(8)
+        .kr(8)
+        .sr(1)
+        .m(3)
+        .n(8)
+        .k(k)
+        .ks(3)
+        .a_offset(127)
+        .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, zero) {
+    TEST_REQUIRES_X86_AVX2;
+    for (uint32_t mz = 0; mz < 3; mz++) {
+      for (size_t k = 1; k <= 40; k += 9) {
+        GemmMicrokernelTester()
+          .mr(3)
+          .nr(8)
+          .kr(8)
+          .sr(1)
+          .m(3)
+          .n(8)
+          .k(k)
+          .ks(3)
+          .a_offset(127)
+          .zero_index(mz)
+          .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+      }
+    }
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, qmin) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(3)
+      .n(8)
+      .k(8)
+      .qmin(128)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, qmax) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(3)
+      .n(8)
+      .k(8)
+      .qmax(128)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+
+  TEST(QS8_IGEMM_MINMAX_FP32_3X8C8__AVX2, strided_cm) {
+    TEST_REQUIRES_X86_AVX2;
+    GemmMicrokernelTester()
+      .mr(3)
+      .nr(8)
+      .kr(8)
+      .sr(1)
+      .m(3)
+      .n(8)
+      .k(8)
+      .cm_stride(11)
+      .Test(xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_fp32_avx2_params, xnn_init_qs8_requantization_fp32_params, xnn_qs8_requantize_fp32);
+  }
+#endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
diff --git a/test/qs8-igemm-minmax-fp32.yaml b/test/qs8-igemm-minmax-fp32.yaml
new file mode 100644
index 0000000..1aa74ad
--- /dev/null
+++ b/test/qs8-igemm-minmax-fp32.yaml
@@ -0,0 +1,14 @@
+# Copyright 2021 Google LLC
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2
+  init: xnn_init_qs8_conv_minmax_fp32_avx2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2
+  init: xnn_init_qs8_conv_minmax_fp32_avx2_params
+  k-block: 8
+- name: xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2
+  init: xnn_init_qs8_conv_minmax_fp32_avx2_params
+  k-block: 8
diff --git a/test/qs8-igemm-minmax-gemmlowp.cc b/test/qs8-igemm-minmax-gemmlowp.cc
index b633849..645e026 100644
--- a/test/qs8-igemm-minmax-gemmlowp.cc
+++ b/test/qs8-igemm-minmax-gemmlowp.cc
@@ -33,7 +33,7 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
@@ -47,7 +47,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
@@ -63,7 +63,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80,7 +80,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -96,7 +96,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -111,7 +111,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -129,7 +129,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -146,7 +146,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -164,7 +164,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -181,7 +181,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -199,7 +199,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -217,7 +217,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -235,7 +235,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -254,7 +254,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -272,7 +272,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -290,7 +290,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -309,7 +309,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -327,7 +327,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -346,7 +346,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -365,7 +365,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -383,7 +383,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -403,7 +403,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -422,7 +422,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -441,7 +441,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -457,7 +457,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
@@ -471,7 +471,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
@@ -485,7 +485,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -501,7 +501,7 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cn) {
@@ -515,7 +515,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile) {
@@ -531,7 +531,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -548,7 +548,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -564,7 +564,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -579,7 +579,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -597,7 +597,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -614,7 +614,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -632,7 +632,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -649,7 +649,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -667,7 +667,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -685,7 +685,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -703,7 +703,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -722,7 +722,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -740,7 +740,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -758,7 +758,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -777,7 +777,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -795,7 +795,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -814,7 +814,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -833,7 +833,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -851,7 +851,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -871,7 +871,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -890,7 +890,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -909,7 +909,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -925,7 +925,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmax) {
@@ -939,7 +939,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm) {
@@ -953,7 +953,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -969,7 +969,7 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, strided_cn) {
@@ -983,7 +983,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -999,7 +999,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1016,7 +1016,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1032,7 +1032,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1047,7 +1047,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1065,7 +1065,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1082,7 +1082,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1100,7 +1100,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1117,7 +1117,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1135,7 +1135,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1153,7 +1153,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1171,7 +1171,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1190,7 +1190,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1208,7 +1208,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1226,7 +1226,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1245,7 +1245,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1263,7 +1263,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1282,7 +1282,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1301,7 +1301,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1319,7 +1319,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1339,7 +1339,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1358,7 +1358,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1377,7 +1377,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1393,7 +1393,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, qmax) {
@@ -1407,7 +1407,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL, strided_cm) {
@@ -1421,7 +1421,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -1437,7 +1437,7 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cn) {
@@ -1451,7 +1451,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile) {
@@ -1467,7 +1467,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1484,7 +1484,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1500,7 +1500,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1515,7 +1515,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1533,7 +1533,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1550,7 +1550,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1568,7 +1568,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1585,7 +1585,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1603,7 +1603,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1621,7 +1621,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1639,7 +1639,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1658,7 +1658,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1676,7 +1676,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1694,7 +1694,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1713,7 +1713,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1731,7 +1731,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1750,7 +1750,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1769,7 +1769,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1787,7 +1787,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1807,7 +1807,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -1826,7 +1826,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1845,7 +1845,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1861,7 +1861,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, qmax) {
@@ -1875,7 +1875,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cm) {
@@ -1889,7 +1889,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -1905,7 +1905,7 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cn) {
@@ -1919,7 +1919,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile) {
@@ -1935,7 +1935,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -1952,7 +1952,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1968,7 +1968,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -1983,7 +1983,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2001,7 +2001,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2018,7 +2018,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2036,7 +2036,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2053,7 +2053,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2071,7 +2071,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2089,7 +2089,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2107,7 +2107,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2126,7 +2126,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2144,7 +2144,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2162,7 +2162,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2181,7 +2181,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2199,7 +2199,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2218,7 +2218,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2237,7 +2237,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2255,7 +2255,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2275,7 +2275,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2294,7 +2294,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2313,7 +2313,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2329,7 +2329,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, qmax) {
@@ -2343,7 +2343,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cm) {
@@ -2357,7 +2357,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -2373,7 +2373,7 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cn) {
@@ -2387,7 +2387,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile) {
@@ -2403,7 +2403,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2420,7 +2420,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2436,7 +2436,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2451,7 +2451,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2469,7 +2469,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2486,7 +2486,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2504,7 +2504,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2521,7 +2521,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2539,7 +2539,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2557,7 +2557,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2575,7 +2575,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2594,7 +2594,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2612,7 +2612,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2630,7 +2630,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2649,7 +2649,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2667,7 +2667,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2686,7 +2686,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2705,7 +2705,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2723,7 +2723,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2743,7 +2743,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2762,7 +2762,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2781,7 +2781,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2797,7 +2797,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, qmax) {
@@ -2811,7 +2811,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cm) {
@@ -2825,7 +2825,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -2841,7 +2841,7 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cn) {
@@ -2855,7 +2855,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, k_eq_16_subtile) {
@@ -2871,7 +2871,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -2888,7 +2888,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2904,7 +2904,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2919,7 +2919,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2937,7 +2937,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2954,7 +2954,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -2972,7 +2972,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -2989,7 +2989,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3007,7 +3007,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3025,7 +3025,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3043,7 +3043,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3062,7 +3062,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3080,7 +3080,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3098,7 +3098,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3117,7 +3117,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3135,7 +3135,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3154,7 +3154,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3173,7 +3173,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3191,7 +3191,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3211,7 +3211,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3230,7 +3230,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3249,7 +3249,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3265,7 +3265,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, qmax) {
@@ -3279,7 +3279,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_CORTEX_A53, strided_cm) {
@@ -3293,7 +3293,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -3309,7 +3309,7 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cn) {
@@ -3323,7 +3323,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, k_eq_16_subtile) {
@@ -3339,7 +3339,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3356,7 +3356,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3372,7 +3372,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3387,7 +3387,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3405,7 +3405,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3422,7 +3422,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3440,7 +3440,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3457,7 +3457,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3475,7 +3475,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3493,7 +3493,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3511,7 +3511,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3530,7 +3530,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3548,7 +3548,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3566,7 +3566,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3585,7 +3585,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3603,7 +3603,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3622,7 +3622,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3641,7 +3641,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3659,7 +3659,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3679,7 +3679,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3698,7 +3698,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3717,7 +3717,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3733,7 +3733,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, qmax) {
@@ -3747,7 +3747,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM_CORTEX_A53, strided_cm) {
@@ -3761,7 +3761,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -3777,7 +3777,7 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, strided_cn) {
@@ -3791,7 +3791,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -3807,7 +3807,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3824,7 +3824,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3840,7 +3840,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3855,7 +3855,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3873,7 +3873,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3890,7 +3890,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3908,7 +3908,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3925,7 +3925,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -3943,7 +3943,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -3961,7 +3961,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3979,7 +3979,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -3998,7 +3998,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4016,7 +4016,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4034,7 +4034,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4053,7 +4053,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4071,7 +4071,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4090,7 +4090,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4109,7 +4109,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4127,7 +4127,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4147,7 +4147,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4166,7 +4166,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4185,7 +4185,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4201,7 +4201,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, qmax) {
@@ -4215,7 +4215,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL, strided_cm) {
@@ -4229,7 +4229,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -4245,7 +4245,7 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cn) {
@@ -4259,7 +4259,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, k_eq_16_subtile) {
@@ -4275,7 +4275,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4292,7 +4292,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4308,7 +4308,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4323,7 +4323,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4341,7 +4341,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4358,7 +4358,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4376,7 +4376,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4393,7 +4393,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4411,7 +4411,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4429,7 +4429,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4447,7 +4447,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4466,7 +4466,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4484,7 +4484,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4502,7 +4502,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4521,7 +4521,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4539,7 +4539,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4558,7 +4558,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4577,7 +4577,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4595,7 +4595,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4615,7 +4615,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4634,7 +4634,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4653,7 +4653,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4669,7 +4669,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, qmax) {
@@ -4683,7 +4683,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AARCH64_NEON_MLAL_PADAL_PRFM, strided_cm) {
@@ -4697,7 +4697,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -4713,7 +4713,7 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, strided_cn) {
@@ -4727,7 +4727,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -4743,7 +4743,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4760,7 +4760,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4776,7 +4776,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4791,7 +4791,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4809,7 +4809,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4826,7 +4826,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4844,7 +4844,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4861,7 +4861,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -4879,7 +4879,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4897,7 +4897,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4915,7 +4915,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4934,7 +4934,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -4952,7 +4952,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4970,7 +4970,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -4989,7 +4989,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5007,7 +5007,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5026,7 +5026,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5045,7 +5045,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5063,7 +5063,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5083,7 +5083,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5102,7 +5102,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5121,7 +5121,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5137,7 +5137,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, qmax) {
@@ -5151,7 +5151,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__AARCH64_NEON_MLAL_PADAL, strided_cm) {
@@ -5165,7 +5165,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -5181,7 +5181,7 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, strided_cn) {
@@ -5195,7 +5195,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, k_eq_8_subtile) {
@@ -5211,7 +5211,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5228,7 +5228,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5244,7 +5244,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5259,7 +5259,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5277,7 +5277,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5294,7 +5294,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5312,7 +5312,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5329,7 +5329,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5347,7 +5347,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5365,7 +5365,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5383,7 +5383,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5402,7 +5402,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5420,7 +5420,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5438,7 +5438,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5457,7 +5457,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5475,7 +5475,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5494,7 +5494,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5513,7 +5513,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5531,7 +5531,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5551,7 +5551,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5570,7 +5570,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5589,7 +5589,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5605,7 +5605,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, qmax) {
@@ -5619,7 +5619,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE, strided_cm) {
@@ -5633,7 +5633,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -5649,7 +5649,7 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, strided_cn) {
@@ -5663,7 +5663,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, k_eq_8_subtile) {
@@ -5679,7 +5679,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5696,7 +5696,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5712,7 +5712,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5727,7 +5727,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5745,7 +5745,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5762,7 +5762,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5780,7 +5780,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5797,7 +5797,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5815,7 +5815,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5833,7 +5833,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5851,7 +5851,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5870,7 +5870,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5888,7 +5888,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5906,7 +5906,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5925,7 +5925,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5943,7 +5943,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -5962,7 +5962,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -5981,7 +5981,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -5999,7 +5999,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6019,7 +6019,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6038,7 +6038,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6057,7 +6057,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6073,7 +6073,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, qmax) {
@@ -6087,7 +6087,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE, strided_cm) {
@@ -6101,7 +6101,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -6117,7 +6117,7 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, strided_cn) {
@@ -6131,7 +6131,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, k_eq_8_subtile) {
@@ -6147,7 +6147,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6164,7 +6164,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6180,7 +6180,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6195,7 +6195,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6213,7 +6213,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6230,7 +6230,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6248,7 +6248,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6265,7 +6265,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6283,7 +6283,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6301,7 +6301,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6319,7 +6319,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6338,7 +6338,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6356,7 +6356,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6374,7 +6374,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6393,7 +6393,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6411,7 +6411,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6430,7 +6430,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6449,7 +6449,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6467,7 +6467,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6487,7 +6487,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6506,7 +6506,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6525,7 +6525,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6541,7 +6541,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, qmax) {
@@ -6555,7 +6555,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE, strided_cm) {
@@ -6569,7 +6569,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -6585,7 +6585,7 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, strided_cn) {
@@ -6599,7 +6599,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, k_eq_8_subtile) {
@@ -6615,7 +6615,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6632,7 +6632,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6648,7 +6648,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6663,7 +6663,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6681,7 +6681,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6698,7 +6698,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6716,7 +6716,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6733,7 +6733,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6751,7 +6751,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6769,7 +6769,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6787,7 +6787,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6806,7 +6806,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6824,7 +6824,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6842,7 +6842,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6861,7 +6861,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6879,7 +6879,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6898,7 +6898,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6917,7 +6917,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6935,7 +6935,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -6955,7 +6955,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -6974,7 +6974,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -6993,7 +6993,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7009,7 +7009,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, qmax) {
@@ -7023,7 +7023,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE, strided_cm) {
@@ -7037,7 +7037,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -7053,7 +7053,7 @@
       .m(6)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, strided_cn) {
@@ -7067,7 +7067,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, k_eq_8_subtile) {
@@ -7083,7 +7083,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7100,7 +7100,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7116,7 +7116,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7131,7 +7131,7 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7149,7 +7149,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7166,7 +7166,7 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7184,7 +7184,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7201,7 +7201,7 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7219,7 +7219,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7237,7 +7237,7 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7255,7 +7255,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7274,7 +7274,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7292,7 +7292,7 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7310,7 +7310,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7329,7 +7329,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7347,7 +7347,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7366,7 +7366,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7385,7 +7385,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7403,7 +7403,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7423,7 +7423,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7442,7 +7442,7 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7461,7 +7461,7 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7477,7 +7477,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, qmax) {
@@ -7491,7 +7491,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE, strided_cm) {
@@ -7505,7 +7505,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -7521,7 +7521,7 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, strided_cn) {
@@ -7535,7 +7535,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, k_eq_8_subtile) {
@@ -7551,7 +7551,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7568,7 +7568,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7584,7 +7584,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7599,7 +7599,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7617,7 +7617,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7634,7 +7634,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7652,7 +7652,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7669,7 +7669,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7687,7 +7687,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7705,7 +7705,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7723,7 +7723,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7742,7 +7742,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7760,7 +7760,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7778,7 +7778,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7797,7 +7797,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7815,7 +7815,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7834,7 +7834,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7853,7 +7853,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7871,7 +7871,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7891,7 +7891,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -7910,7 +7910,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -7929,7 +7929,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -7945,7 +7945,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, qmax) {
@@ -7959,7 +7959,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE, strided_cm) {
@@ -7973,7 +7973,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -7989,7 +7989,7 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, strided_cn) {
@@ -8003,7 +8003,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, k_eq_8_subtile) {
@@ -8019,7 +8019,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8036,7 +8036,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8052,7 +8052,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8067,7 +8067,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8085,7 +8085,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8102,7 +8102,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8120,7 +8120,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8137,7 +8137,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8155,7 +8155,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8173,7 +8173,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8191,7 +8191,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8210,7 +8210,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8228,7 +8228,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8246,7 +8246,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8265,7 +8265,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8283,7 +8283,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8302,7 +8302,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8321,7 +8321,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8339,7 +8339,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8359,7 +8359,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8378,7 +8378,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8397,7 +8397,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8413,7 +8413,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, qmax) {
@@ -8427,7 +8427,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE, strided_cm) {
@@ -8441,7 +8441,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -8457,7 +8457,7 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, strided_cn) {
@@ -8471,7 +8471,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, k_eq_8_subtile) {
@@ -8487,7 +8487,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8504,7 +8504,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8520,7 +8520,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8535,7 +8535,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8553,7 +8553,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8570,7 +8570,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8588,7 +8588,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8605,7 +8605,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8623,7 +8623,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8641,7 +8641,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8659,7 +8659,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8678,7 +8678,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8696,7 +8696,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8714,7 +8714,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8733,7 +8733,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8751,7 +8751,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8770,7 +8770,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8789,7 +8789,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8807,7 +8807,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8827,7 +8827,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -8846,7 +8846,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8865,7 +8865,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8881,7 +8881,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, qmax) {
@@ -8895,7 +8895,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE, strided_cm) {
@@ -8909,7 +8909,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -8925,7 +8925,7 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, strided_cn) {
@@ -8939,7 +8939,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, k_eq_8_subtile) {
@@ -8955,7 +8955,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -8972,7 +8972,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -8988,7 +8988,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9003,7 +9003,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9021,7 +9021,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9038,7 +9038,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9056,7 +9056,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9073,7 +9073,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9091,7 +9091,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9109,7 +9109,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9127,7 +9127,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9146,7 +9146,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9164,7 +9164,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9182,7 +9182,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9201,7 +9201,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9219,7 +9219,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9238,7 +9238,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9257,7 +9257,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9275,7 +9275,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9295,7 +9295,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9314,7 +9314,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9333,7 +9333,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9349,7 +9349,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, qmax) {
@@ -9363,7 +9363,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE, strided_cm) {
@@ -9377,7 +9377,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -9393,7 +9393,7 @@
       .m(6)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, strided_cn) {
@@ -9407,7 +9407,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, k_eq_8_subtile) {
@@ -9423,7 +9423,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9440,7 +9440,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9456,7 +9456,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9471,7 +9471,7 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9489,7 +9489,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9506,7 +9506,7 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9524,7 +9524,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9541,7 +9541,7 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9559,7 +9559,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9577,7 +9577,7 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9595,7 +9595,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9614,7 +9614,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9632,7 +9632,7 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9650,7 +9650,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9669,7 +9669,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9687,7 +9687,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9706,7 +9706,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9725,7 +9725,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9743,7 +9743,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9763,7 +9763,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9782,7 +9782,7 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9801,7 +9801,7 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9817,7 +9817,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, qmax) {
@@ -9831,7 +9831,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE, strided_cm) {
@@ -9845,7 +9845,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -9861,7 +9861,7 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, strided_cn) {
@@ -9875,7 +9875,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
@@ -9891,7 +9891,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -9908,7 +9908,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9924,7 +9924,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9939,7 +9939,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9957,7 +9957,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -9974,7 +9974,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -9992,7 +9992,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10009,7 +10009,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10027,7 +10027,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10045,7 +10045,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10063,7 +10063,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10082,7 +10082,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10100,7 +10100,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10118,7 +10118,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10137,7 +10137,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10155,7 +10155,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10174,7 +10174,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10193,7 +10193,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10211,7 +10211,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10231,7 +10231,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10250,7 +10250,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10269,7 +10269,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10285,7 +10285,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, qmax) {
@@ -10299,7 +10299,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MLAL_LANE_PRFM, strided_cm) {
@@ -10313,7 +10313,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -10329,7 +10329,7 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, strided_cn) {
@@ -10343,7 +10343,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
@@ -10359,7 +10359,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10376,7 +10376,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10392,7 +10392,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10407,7 +10407,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10425,7 +10425,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10442,7 +10442,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10460,7 +10460,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10477,7 +10477,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10495,7 +10495,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10513,7 +10513,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10531,7 +10531,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10550,7 +10550,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10568,7 +10568,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10586,7 +10586,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10605,7 +10605,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10623,7 +10623,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10642,7 +10642,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10661,7 +10661,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10679,7 +10679,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10699,7 +10699,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10718,7 +10718,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10737,7 +10737,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10753,7 +10753,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, qmax) {
@@ -10767,7 +10767,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MLAL_LANE_PRFM, strided_cm) {
@@ -10781,7 +10781,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -10797,7 +10797,7 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, strided_cn) {
@@ -10811,7 +10811,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
@@ -10827,7 +10827,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10844,7 +10844,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10860,7 +10860,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10875,7 +10875,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10893,7 +10893,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10910,7 +10910,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10928,7 +10928,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10945,7 +10945,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -10963,7 +10963,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -10981,7 +10981,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -10999,7 +10999,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11018,7 +11018,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11036,7 +11036,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11054,7 +11054,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11073,7 +11073,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11091,7 +11091,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11110,7 +11110,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11129,7 +11129,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11147,7 +11147,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11167,7 +11167,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11186,7 +11186,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11205,7 +11205,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11221,7 +11221,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, qmax) {
@@ -11235,7 +11235,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MLAL_LANE_PRFM, strided_cm) {
@@ -11249,7 +11249,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -11265,7 +11265,7 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, strided_cn) {
@@ -11279,7 +11279,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
@@ -11295,7 +11295,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11312,7 +11312,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11328,7 +11328,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11343,7 +11343,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11361,7 +11361,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11378,7 +11378,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11396,7 +11396,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11413,7 +11413,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11431,7 +11431,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11449,7 +11449,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11467,7 +11467,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11486,7 +11486,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11504,7 +11504,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11522,7 +11522,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11541,7 +11541,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11559,7 +11559,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11578,7 +11578,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11597,7 +11597,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11615,7 +11615,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11635,7 +11635,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11654,7 +11654,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11673,7 +11673,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11689,7 +11689,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, qmax) {
@@ -11703,7 +11703,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MLAL_LANE_PRFM, strided_cm) {
@@ -11717,7 +11717,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -11733,7 +11733,7 @@
       .m(6)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, strided_cn) {
@@ -11747,7 +11747,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
@@ -11763,7 +11763,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11780,7 +11780,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11796,7 +11796,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11811,7 +11811,7 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11829,7 +11829,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11846,7 +11846,7 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11864,7 +11864,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11881,7 +11881,7 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -11899,7 +11899,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11917,7 +11917,7 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11935,7 +11935,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11954,7 +11954,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -11972,7 +11972,7 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -11990,7 +11990,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12009,7 +12009,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12027,7 +12027,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12046,7 +12046,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12065,7 +12065,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12083,7 +12083,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12103,7 +12103,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12122,7 +12122,7 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12141,7 +12141,7 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12157,7 +12157,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, qmax) {
@@ -12171,7 +12171,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8__NEON_MLAL_LANE_PRFM, strided_cm) {
@@ -12185,7 +12185,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -12201,7 +12201,7 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, strided_cn) {
@@ -12215,7 +12215,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
@@ -12231,7 +12231,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12248,7 +12248,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12264,7 +12264,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12279,7 +12279,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12297,7 +12297,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12314,7 +12314,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12332,7 +12332,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12349,7 +12349,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12367,7 +12367,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12385,7 +12385,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12403,7 +12403,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12422,7 +12422,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12440,7 +12440,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12458,7 +12458,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12477,7 +12477,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12495,7 +12495,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12514,7 +12514,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12533,7 +12533,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12551,7 +12551,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12571,7 +12571,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12590,7 +12590,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12609,7 +12609,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12625,7 +12625,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, qmax) {
@@ -12639,7 +12639,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MLAL_LANE_PRFM, strided_cm) {
@@ -12653,7 +12653,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -12669,7 +12669,7 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, strided_cn) {
@@ -12683,7 +12683,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
@@ -12699,7 +12699,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12716,7 +12716,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12732,7 +12732,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12747,7 +12747,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12765,7 +12765,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12782,7 +12782,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12800,7 +12800,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12817,7 +12817,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12835,7 +12835,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12853,7 +12853,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12871,7 +12871,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12890,7 +12890,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12908,7 +12908,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12926,7 +12926,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -12945,7 +12945,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -12963,7 +12963,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -12982,7 +12982,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13001,7 +13001,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13019,7 +13019,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13039,7 +13039,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13058,7 +13058,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13077,7 +13077,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13093,7 +13093,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, qmax) {
@@ -13107,7 +13107,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MLAL_LANE_PRFM, strided_cm) {
@@ -13121,7 +13121,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -13137,7 +13137,7 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, strided_cn) {
@@ -13151,7 +13151,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
@@ -13167,7 +13167,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13184,7 +13184,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13200,7 +13200,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13215,7 +13215,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13233,7 +13233,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13250,7 +13250,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13268,7 +13268,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13285,7 +13285,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13303,7 +13303,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13321,7 +13321,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13339,7 +13339,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13358,7 +13358,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13376,7 +13376,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13394,7 +13394,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13413,7 +13413,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13431,7 +13431,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13450,7 +13450,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13469,7 +13469,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13487,7 +13487,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13507,7 +13507,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13526,7 +13526,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13545,7 +13545,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13561,7 +13561,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, qmax) {
@@ -13575,7 +13575,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MLAL_LANE_PRFM, strided_cm) {
@@ -13589,7 +13589,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -13605,7 +13605,7 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, strided_cn) {
@@ -13619,7 +13619,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
@@ -13635,7 +13635,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13652,7 +13652,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13668,7 +13668,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13683,7 +13683,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13701,7 +13701,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13718,7 +13718,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13736,7 +13736,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13753,7 +13753,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13771,7 +13771,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13789,7 +13789,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13807,7 +13807,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13826,7 +13826,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13844,7 +13844,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13862,7 +13862,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13881,7 +13881,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13899,7 +13899,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -13918,7 +13918,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13937,7 +13937,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13955,7 +13955,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -13975,7 +13975,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -13994,7 +13994,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14013,7 +14013,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14029,7 +14029,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, qmax) {
@@ -14043,7 +14043,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MLAL_LANE_PRFM, strided_cm) {
@@ -14057,7 +14057,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -14073,7 +14073,7 @@
       .m(6)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, strided_cn) {
@@ -14087,7 +14087,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
@@ -14103,7 +14103,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14120,7 +14120,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14136,7 +14136,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14151,7 +14151,7 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14169,7 +14169,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14186,7 +14186,7 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14204,7 +14204,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14221,7 +14221,7 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14239,7 +14239,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14257,7 +14257,7 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14275,7 +14275,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14294,7 +14294,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14312,7 +14312,7 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14330,7 +14330,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14349,7 +14349,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14367,7 +14367,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14386,7 +14386,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14405,7 +14405,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14423,7 +14423,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14443,7 +14443,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14462,7 +14462,7 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14481,7 +14481,7 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14497,7 +14497,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, qmax) {
@@ -14511,7 +14511,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16__NEON_MLAL_LANE_PRFM, strided_cm) {
@@ -14525,7 +14525,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -14541,7 +14541,7 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, strided_cn) {
@@ -14555,7 +14555,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
@@ -14571,7 +14571,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14588,7 +14588,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14604,7 +14604,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14619,7 +14619,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14637,7 +14637,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14654,7 +14654,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14672,7 +14672,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14689,7 +14689,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14707,7 +14707,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14725,7 +14725,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14743,7 +14743,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14762,7 +14762,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14780,7 +14780,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14798,7 +14798,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14817,7 +14817,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14835,7 +14835,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14854,7 +14854,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14873,7 +14873,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14891,7 +14891,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14911,7 +14911,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -14930,7 +14930,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -14949,7 +14949,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -14965,7 +14965,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, qmax) {
@@ -14979,7 +14979,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MULL_PADAL, strided_cm) {
@@ -14993,7 +14993,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -15009,7 +15009,7 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, strided_cn) {
@@ -15023,7 +15023,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
@@ -15039,7 +15039,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15056,7 +15056,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15072,7 +15072,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15087,7 +15087,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15105,7 +15105,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15122,7 +15122,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15140,7 +15140,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15157,7 +15157,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15175,7 +15175,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15193,7 +15193,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15211,7 +15211,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15230,7 +15230,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15248,7 +15248,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15266,7 +15266,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15285,7 +15285,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15303,7 +15303,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15322,7 +15322,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15341,7 +15341,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15359,7 +15359,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15379,7 +15379,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15398,7 +15398,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15417,7 +15417,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15433,7 +15433,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, qmax) {
@@ -15447,7 +15447,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MULL_PADAL, strided_cm) {
@@ -15461,7 +15461,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -15477,7 +15477,7 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, strided_cn) {
@@ -15491,7 +15491,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
@@ -15507,7 +15507,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15524,7 +15524,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15540,7 +15540,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15555,7 +15555,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15573,7 +15573,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15590,7 +15590,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15608,7 +15608,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15625,7 +15625,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15643,7 +15643,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15661,7 +15661,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15679,7 +15679,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15698,7 +15698,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15716,7 +15716,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15734,7 +15734,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15753,7 +15753,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15771,7 +15771,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15790,7 +15790,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15809,7 +15809,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15827,7 +15827,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15847,7 +15847,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -15866,7 +15866,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -15885,7 +15885,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15901,7 +15901,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, qmax) {
@@ -15915,7 +15915,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MULL_PADAL, strided_cm) {
@@ -15929,7 +15929,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -15945,7 +15945,7 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, strided_cn) {
@@ -15959,7 +15959,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, k_eq_8_subtile) {
@@ -15975,7 +15975,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -15992,7 +15992,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16008,7 +16008,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16023,7 +16023,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16041,7 +16041,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16058,7 +16058,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16076,7 +16076,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16093,7 +16093,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16111,7 +16111,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16129,7 +16129,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16147,7 +16147,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16166,7 +16166,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16184,7 +16184,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16202,7 +16202,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16221,7 +16221,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16239,7 +16239,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16258,7 +16258,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16277,7 +16277,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16295,7 +16295,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16315,7 +16315,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16334,7 +16334,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16353,7 +16353,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16369,7 +16369,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, qmax) {
@@ -16383,7 +16383,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MULL_PADAL, strided_cm) {
@@ -16397,7 +16397,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -16413,7 +16413,7 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, strided_cn) {
@@ -16427,7 +16427,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
@@ -16443,7 +16443,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16460,7 +16460,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16476,7 +16476,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16491,7 +16491,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16509,7 +16509,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16526,7 +16526,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16544,7 +16544,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16561,7 +16561,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16579,7 +16579,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16597,7 +16597,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16615,7 +16615,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16634,7 +16634,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16652,7 +16652,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16670,7 +16670,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16689,7 +16689,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16707,7 +16707,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16726,7 +16726,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16745,7 +16745,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16763,7 +16763,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16783,7 +16783,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16802,7 +16802,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16821,7 +16821,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16837,7 +16837,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, qmax) {
@@ -16851,7 +16851,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MULL_PADAL, strided_cm) {
@@ -16865,7 +16865,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -16881,7 +16881,7 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, strided_cn) {
@@ -16895,7 +16895,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
@@ -16911,7 +16911,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -16928,7 +16928,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16944,7 +16944,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16959,7 +16959,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -16977,7 +16977,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -16994,7 +16994,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17012,7 +17012,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17029,7 +17029,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17047,7 +17047,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17065,7 +17065,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17083,7 +17083,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17102,7 +17102,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17120,7 +17120,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17138,7 +17138,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17157,7 +17157,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17175,7 +17175,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17194,7 +17194,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17213,7 +17213,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17231,7 +17231,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17251,7 +17251,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17270,7 +17270,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17289,7 +17289,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17305,7 +17305,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, qmax) {
@@ -17319,7 +17319,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MULL_PADAL, strided_cm) {
@@ -17333,7 +17333,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -17349,7 +17349,7 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, strided_cn) {
@@ -17363,7 +17363,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
@@ -17379,7 +17379,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17396,7 +17396,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17412,7 +17412,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17427,7 +17427,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17445,7 +17445,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17462,7 +17462,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17480,7 +17480,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17497,7 +17497,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17515,7 +17515,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17533,7 +17533,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17551,7 +17551,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17570,7 +17570,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17588,7 +17588,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17606,7 +17606,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17625,7 +17625,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17643,7 +17643,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17662,7 +17662,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17681,7 +17681,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17699,7 +17699,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17719,7 +17719,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17738,7 +17738,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17757,7 +17757,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17773,7 +17773,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, qmax) {
@@ -17787,7 +17787,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MULL_PADAL, strided_cm) {
@@ -17801,7 +17801,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -17817,7 +17817,7 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, strided_cn) {
@@ -17831,7 +17831,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, k_eq_8_subtile) {
@@ -17847,7 +17847,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -17864,7 +17864,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17880,7 +17880,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17895,7 +17895,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17913,7 +17913,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17930,7 +17930,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17948,7 +17948,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -17965,7 +17965,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -17983,7 +17983,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18001,7 +18001,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18019,7 +18019,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18038,7 +18038,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18056,7 +18056,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18074,7 +18074,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18093,7 +18093,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18111,7 +18111,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18130,7 +18130,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18149,7 +18149,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18167,7 +18167,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18187,7 +18187,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18206,7 +18206,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18225,7 +18225,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18241,7 +18241,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, qmax) {
@@ -18255,7 +18255,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MULL_PADAL, strided_cm) {
@@ -18269,7 +18269,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -18285,7 +18285,7 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, strided_cn) {
@@ -18299,7 +18299,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -18315,7 +18315,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18332,7 +18332,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18348,7 +18348,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18363,7 +18363,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18381,7 +18381,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18398,7 +18398,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18416,7 +18416,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18433,7 +18433,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18451,7 +18451,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18469,7 +18469,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18487,7 +18487,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18506,7 +18506,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18524,7 +18524,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18542,7 +18542,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18561,7 +18561,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18579,7 +18579,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18598,7 +18598,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18617,7 +18617,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18635,7 +18635,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18655,7 +18655,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18674,7 +18674,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18693,7 +18693,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18709,7 +18709,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, qmax) {
@@ -18723,7 +18723,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__NEON_MLAL_PADAL, strided_cm) {
@@ -18737,7 +18737,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -18753,7 +18753,7 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, strided_cn) {
@@ -18767,7 +18767,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -18783,7 +18783,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18800,7 +18800,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18816,7 +18816,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18831,7 +18831,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18849,7 +18849,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18866,7 +18866,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18884,7 +18884,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18901,7 +18901,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -18919,7 +18919,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18937,7 +18937,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18955,7 +18955,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -18974,7 +18974,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -18992,7 +18992,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19010,7 +19010,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19029,7 +19029,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19047,7 +19047,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19066,7 +19066,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19085,7 +19085,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19103,7 +19103,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19123,7 +19123,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19142,7 +19142,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19161,7 +19161,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19177,7 +19177,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, qmax) {
@@ -19191,7 +19191,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__NEON_MLAL_PADAL, strided_cm) {
@@ -19205,7 +19205,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -19221,7 +19221,7 @@
       .m(3)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, strided_cn) {
@@ -19235,7 +19235,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -19251,7 +19251,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19268,7 +19268,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19284,7 +19284,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19299,7 +19299,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19317,7 +19317,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19334,7 +19334,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19352,7 +19352,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19369,7 +19369,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19387,7 +19387,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19405,7 +19405,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19423,7 +19423,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19442,7 +19442,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19460,7 +19460,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19478,7 +19478,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19497,7 +19497,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19515,7 +19515,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19534,7 +19534,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19553,7 +19553,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19571,7 +19571,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19591,7 +19591,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19610,7 +19610,7 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19629,7 +19629,7 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19645,7 +19645,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, qmax) {
@@ -19659,7 +19659,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__NEON_MLAL_PADAL, strided_cm) {
@@ -19673,7 +19673,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -19689,7 +19689,7 @@
       .m(4)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, strided_cn) {
@@ -19703,7 +19703,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -19719,7 +19719,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19736,7 +19736,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19752,7 +19752,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19767,7 +19767,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19785,7 +19785,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19802,7 +19802,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19820,7 +19820,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19837,7 +19837,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -19855,7 +19855,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19873,7 +19873,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19891,7 +19891,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19910,7 +19910,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19928,7 +19928,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19946,7 +19946,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -19965,7 +19965,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -19983,7 +19983,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20002,7 +20002,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20021,7 +20021,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20039,7 +20039,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20059,7 +20059,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20078,7 +20078,7 @@
         .k(k)
         .ks(3)
         .a_offset(331)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20097,7 +20097,7 @@
           .ks(3)
           .a_offset(331)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20113,7 +20113,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, qmax) {
@@ -20127,7 +20127,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C8__NEON_MLAL_PADAL, strided_cm) {
@@ -20141,7 +20141,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -20157,7 +20157,7 @@
       .m(1)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, strided_cn) {
@@ -20171,7 +20171,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -20187,7 +20187,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20204,7 +20204,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20220,7 +20220,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20235,7 +20235,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20253,7 +20253,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20270,7 +20270,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20288,7 +20288,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20305,7 +20305,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20323,7 +20323,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20341,7 +20341,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20359,7 +20359,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20378,7 +20378,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20396,7 +20396,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20414,7 +20414,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20433,7 +20433,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20451,7 +20451,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20470,7 +20470,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20489,7 +20489,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20507,7 +20507,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20527,7 +20527,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20546,7 +20546,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20565,7 +20565,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20581,7 +20581,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, qmax) {
@@ -20595,7 +20595,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__NEON_MLAL_PADAL, strided_cm) {
@@ -20609,7 +20609,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -20625,7 +20625,7 @@
       .m(2)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, strided_cn) {
@@ -20639,7 +20639,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -20655,7 +20655,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20672,7 +20672,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20688,7 +20688,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20703,7 +20703,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20721,7 +20721,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20738,7 +20738,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20756,7 +20756,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20773,7 +20773,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20791,7 +20791,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20809,7 +20809,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20827,7 +20827,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20846,7 +20846,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20864,7 +20864,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20882,7 +20882,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20901,7 +20901,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20919,7 +20919,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -20938,7 +20938,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -20957,7 +20957,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20975,7 +20975,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -20995,7 +20995,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21014,7 +21014,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21033,7 +21033,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21049,7 +21049,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, qmax) {
@@ -21063,7 +21063,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__NEON_MLAL_PADAL, strided_cm) {
@@ -21077,7 +21077,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -21093,7 +21093,7 @@
       .m(3)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, strided_cn) {
@@ -21107,7 +21107,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -21123,7 +21123,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21140,7 +21140,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21156,7 +21156,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21171,7 +21171,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21189,7 +21189,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21206,7 +21206,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21224,7 +21224,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21241,7 +21241,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21259,7 +21259,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21277,7 +21277,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21295,7 +21295,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21314,7 +21314,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21332,7 +21332,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21350,7 +21350,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21369,7 +21369,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21387,7 +21387,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21406,7 +21406,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21425,7 +21425,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21443,7 +21443,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21463,7 +21463,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21482,7 +21482,7 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21501,7 +21501,7 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21517,7 +21517,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, qmax) {
@@ -21531,7 +21531,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__NEON_MLAL_PADAL, strided_cm) {
@@ -21545,7 +21545,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -21561,7 +21561,7 @@
       .m(4)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, strided_cn) {
@@ -21575,7 +21575,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -21591,7 +21591,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21608,7 +21608,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21624,7 +21624,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21639,7 +21639,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21657,7 +21657,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21674,7 +21674,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21692,7 +21692,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21709,7 +21709,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21727,7 +21727,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21745,7 +21745,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21763,7 +21763,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21782,7 +21782,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21800,7 +21800,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21818,7 +21818,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21837,7 +21837,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21855,7 +21855,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21874,7 +21874,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21893,7 +21893,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21911,7 +21911,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21931,7 +21931,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -21950,7 +21950,7 @@
         .k(k)
         .ks(3)
         .a_offset(331)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -21969,7 +21969,7 @@
           .ks(3)
           .a_offset(331)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -21985,7 +21985,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, qmax) {
@@ -21999,7 +21999,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__NEON_MLAL_PADAL, strided_cm) {
@@ -22013,7 +22013,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -22029,7 +22029,7 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, strided_cn) {
@@ -22043,7 +22043,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -22059,7 +22059,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22076,7 +22076,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22092,7 +22092,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22107,7 +22107,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22125,7 +22125,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22142,7 +22142,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22160,7 +22160,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22177,7 +22177,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22195,7 +22195,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22213,7 +22213,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22231,7 +22231,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22250,7 +22250,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22268,7 +22268,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22286,7 +22286,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22305,7 +22305,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22323,7 +22323,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22342,7 +22342,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22361,7 +22361,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22379,7 +22379,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22399,7 +22399,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22418,7 +22418,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22437,7 +22437,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22453,7 +22453,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, qmax) {
@@ -22467,7 +22467,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C16__NEON_MLAL_PADAL, strided_cm) {
@@ -22481,7 +22481,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -22497,7 +22497,7 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, strided_cn) {
@@ -22511,7 +22511,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -22527,7 +22527,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22544,7 +22544,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22560,7 +22560,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22575,7 +22575,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22593,7 +22593,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22610,7 +22610,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22628,7 +22628,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22645,7 +22645,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22663,7 +22663,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22681,7 +22681,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22699,7 +22699,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22718,7 +22718,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22736,7 +22736,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22754,7 +22754,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22773,7 +22773,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22791,7 +22791,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22810,7 +22810,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22829,7 +22829,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22847,7 +22847,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22867,7 +22867,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -22886,7 +22886,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -22905,7 +22905,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -22921,7 +22921,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, qmax) {
@@ -22935,7 +22935,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C16__NEON_MLAL_PADAL, strided_cm) {
@@ -22949,7 +22949,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -22965,7 +22965,7 @@
       .m(3)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, strided_cn) {
@@ -22979,7 +22979,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -22995,7 +22995,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23012,7 +23012,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23028,7 +23028,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23043,7 +23043,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23061,7 +23061,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23078,7 +23078,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23096,7 +23096,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23113,7 +23113,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23131,7 +23131,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23149,7 +23149,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23167,7 +23167,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23186,7 +23186,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23204,7 +23204,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23222,7 +23222,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23241,7 +23241,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23259,7 +23259,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23278,7 +23278,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23297,7 +23297,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23315,7 +23315,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23335,7 +23335,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23354,7 +23354,7 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23373,7 +23373,7 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23389,7 +23389,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, qmax) {
@@ -23403,7 +23403,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C16__NEON_MLAL_PADAL, strided_cm) {
@@ -23417,7 +23417,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -23433,7 +23433,7 @@
       .m(4)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, strided_cn) {
@@ -23447,7 +23447,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -23463,7 +23463,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23480,7 +23480,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23496,7 +23496,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23511,7 +23511,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23529,7 +23529,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23546,7 +23546,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23564,7 +23564,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23581,7 +23581,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23599,7 +23599,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23617,7 +23617,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23635,7 +23635,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23654,7 +23654,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23672,7 +23672,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23690,7 +23690,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23709,7 +23709,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23727,7 +23727,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23746,7 +23746,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23765,7 +23765,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23783,7 +23783,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23803,7 +23803,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -23822,7 +23822,7 @@
         .k(k)
         .ks(3)
         .a_offset(331)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23841,7 +23841,7 @@
           .ks(3)
           .a_offset(331)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23857,7 +23857,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, qmax) {
@@ -23871,7 +23871,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C16__NEON_MLAL_PADAL, strided_cm) {
@@ -23885,7 +23885,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -23901,7 +23901,7 @@
       .m(1)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, strided_cn) {
@@ -23915,7 +23915,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -23931,7 +23931,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -23948,7 +23948,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23964,7 +23964,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23979,7 +23979,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -23997,7 +23997,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24014,7 +24014,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24032,7 +24032,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24049,7 +24049,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24067,7 +24067,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24085,7 +24085,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24103,7 +24103,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24122,7 +24122,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24140,7 +24140,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24158,7 +24158,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24177,7 +24177,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24195,7 +24195,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24214,7 +24214,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24233,7 +24233,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24251,7 +24251,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24271,7 +24271,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24290,7 +24290,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24309,7 +24309,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24325,7 +24325,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, qmax) {
@@ -24339,7 +24339,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C16__NEON_MLAL_PADAL, strided_cm) {
@@ -24353,7 +24353,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -24369,7 +24369,7 @@
       .m(2)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, strided_cn) {
@@ -24383,7 +24383,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -24399,7 +24399,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24416,7 +24416,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24432,7 +24432,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24447,7 +24447,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24465,7 +24465,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24482,7 +24482,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24500,7 +24500,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24517,7 +24517,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24535,7 +24535,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24553,7 +24553,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24571,7 +24571,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24590,7 +24590,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24608,7 +24608,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24626,7 +24626,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24645,7 +24645,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24663,7 +24663,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24682,7 +24682,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24701,7 +24701,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24719,7 +24719,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24739,7 +24739,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24758,7 +24758,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24777,7 +24777,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24793,7 +24793,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, qmax) {
@@ -24807,7 +24807,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C16__NEON_MLAL_PADAL, strided_cm) {
@@ -24821,7 +24821,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -24837,7 +24837,7 @@
       .m(3)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, strided_cn) {
@@ -24851,7 +24851,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -24867,7 +24867,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -24884,7 +24884,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24900,7 +24900,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24915,7 +24915,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24933,7 +24933,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24950,7 +24950,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -24968,7 +24968,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -24985,7 +24985,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25003,7 +25003,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25021,7 +25021,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25039,7 +25039,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25058,7 +25058,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25076,7 +25076,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25094,7 +25094,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25113,7 +25113,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25131,7 +25131,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25150,7 +25150,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25169,7 +25169,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25187,7 +25187,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25207,7 +25207,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25226,7 +25226,7 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25245,7 +25245,7 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25261,7 +25261,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, qmax) {
@@ -25275,7 +25275,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C16__NEON_MLAL_PADAL, strided_cm) {
@@ -25289,7 +25289,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -25305,7 +25305,7 @@
       .m(4)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, strided_cn) {
@@ -25319,7 +25319,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, k_eq_16_subtile) {
@@ -25335,7 +25335,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25352,7 +25352,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25368,7 +25368,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25383,7 +25383,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25401,7 +25401,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25418,7 +25418,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25436,7 +25436,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25453,7 +25453,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25471,7 +25471,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25489,7 +25489,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25507,7 +25507,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25526,7 +25526,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25544,7 +25544,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25562,7 +25562,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25581,7 +25581,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25599,7 +25599,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25618,7 +25618,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25637,7 +25637,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25655,7 +25655,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25675,7 +25675,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25694,7 +25694,7 @@
         .k(k)
         .ks(3)
         .a_offset(331)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25713,7 +25713,7 @@
           .ks(3)
           .a_offset(331)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25729,7 +25729,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, qmax) {
@@ -25743,7 +25743,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C16__NEON_MLAL_PADAL, strided_cm) {
@@ -25757,7 +25757,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -25773,7 +25773,7 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
@@ -25787,7 +25787,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
@@ -25803,7 +25803,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25820,7 +25820,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25836,7 +25836,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25851,7 +25851,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25869,7 +25869,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25886,7 +25886,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25904,7 +25904,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25921,7 +25921,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -25939,7 +25939,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -25957,7 +25957,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25975,7 +25975,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -25994,7 +25994,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26012,7 +26012,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26030,7 +26030,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26049,7 +26049,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26067,7 +26067,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26086,7 +26086,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26105,7 +26105,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26123,7 +26123,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26143,7 +26143,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26162,7 +26162,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26181,7 +26181,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26197,7 +26197,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, qmax) {
@@ -26211,7 +26211,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
@@ -26225,7 +26225,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -26241,7 +26241,7 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
@@ -26255,7 +26255,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
@@ -26271,7 +26271,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26288,7 +26288,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26304,7 +26304,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26319,7 +26319,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26337,7 +26337,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26354,7 +26354,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26372,7 +26372,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26389,7 +26389,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26407,7 +26407,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26425,7 +26425,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26443,7 +26443,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26462,7 +26462,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26480,7 +26480,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26498,7 +26498,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26517,7 +26517,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26535,7 +26535,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26554,7 +26554,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26573,7 +26573,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26591,7 +26591,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26611,7 +26611,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26630,7 +26630,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26649,7 +26649,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26665,7 +26665,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, qmax) {
@@ -26679,7 +26679,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
@@ -26693,7 +26693,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -26709,7 +26709,7 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
@@ -26723,7 +26723,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
@@ -26739,7 +26739,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26756,7 +26756,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26772,7 +26772,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26787,7 +26787,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26805,7 +26805,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26822,7 +26822,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26840,7 +26840,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26857,7 +26857,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -26875,7 +26875,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26893,7 +26893,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26911,7 +26911,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26930,7 +26930,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -26948,7 +26948,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26966,7 +26966,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -26985,7 +26985,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27003,7 +27003,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27022,7 +27022,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27041,7 +27041,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27059,7 +27059,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27079,7 +27079,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27098,7 +27098,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27117,7 +27117,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27133,7 +27133,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, qmax) {
@@ -27147,7 +27147,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
@@ -27161,7 +27161,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -27177,7 +27177,7 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, strided_cn) {
@@ -27191,7 +27191,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
@@ -27207,7 +27207,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27224,7 +27224,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27240,7 +27240,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27255,7 +27255,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27273,7 +27273,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27290,7 +27290,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27308,7 +27308,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27325,7 +27325,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27343,7 +27343,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27361,7 +27361,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27379,7 +27379,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27398,7 +27398,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27416,7 +27416,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27434,7 +27434,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27453,7 +27453,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27471,7 +27471,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27490,7 +27490,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27509,7 +27509,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27527,7 +27527,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27547,7 +27547,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27566,7 +27566,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27585,7 +27585,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27601,7 +27601,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, qmax) {
@@ -27615,7 +27615,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MULL_PADAL_DUP, strided_cm) {
@@ -27629,7 +27629,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -27645,7 +27645,7 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
@@ -27659,7 +27659,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
@@ -27675,7 +27675,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27692,7 +27692,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27708,7 +27708,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27723,7 +27723,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27741,7 +27741,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27758,7 +27758,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27776,7 +27776,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27793,7 +27793,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27811,7 +27811,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27829,7 +27829,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27847,7 +27847,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27866,7 +27866,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27884,7 +27884,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27902,7 +27902,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27921,7 +27921,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27939,7 +27939,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -27958,7 +27958,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -27977,7 +27977,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -27995,7 +27995,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28015,7 +28015,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28034,7 +28034,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28053,7 +28053,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28069,7 +28069,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, qmax) {
@@ -28083,7 +28083,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
@@ -28097,7 +28097,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -28113,7 +28113,7 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
@@ -28127,7 +28127,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
@@ -28143,7 +28143,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28160,7 +28160,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28176,7 +28176,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28191,7 +28191,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28209,7 +28209,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28226,7 +28226,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28244,7 +28244,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28261,7 +28261,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28279,7 +28279,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28297,7 +28297,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28315,7 +28315,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28334,7 +28334,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28352,7 +28352,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28370,7 +28370,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28389,7 +28389,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28407,7 +28407,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28426,7 +28426,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28445,7 +28445,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28463,7 +28463,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28483,7 +28483,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28502,7 +28502,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28521,7 +28521,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28537,7 +28537,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, qmax) {
@@ -28551,7 +28551,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
@@ -28565,7 +28565,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -28581,7 +28581,7 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
@@ -28595,7 +28595,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
@@ -28611,7 +28611,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28628,7 +28628,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28644,7 +28644,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28659,7 +28659,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28677,7 +28677,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28694,7 +28694,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28712,7 +28712,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28729,7 +28729,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28747,7 +28747,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28765,7 +28765,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28783,7 +28783,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28802,7 +28802,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28820,7 +28820,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28838,7 +28838,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28857,7 +28857,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28875,7 +28875,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28894,7 +28894,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28913,7 +28913,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28931,7 +28931,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -28951,7 +28951,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -28970,7 +28970,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -28989,7 +28989,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29005,7 +29005,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, qmax) {
@@ -29019,7 +29019,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
@@ -29033,7 +29033,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -29049,7 +29049,7 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, strided_cn) {
@@ -29063,7 +29063,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, k_eq_8_subtile) {
@@ -29079,7 +29079,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29096,7 +29096,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29112,7 +29112,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29127,7 +29127,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29145,7 +29145,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29162,7 +29162,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29180,7 +29180,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29197,7 +29197,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29215,7 +29215,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29233,7 +29233,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29251,7 +29251,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29270,7 +29270,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29288,7 +29288,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29306,7 +29306,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29325,7 +29325,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29343,7 +29343,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29362,7 +29362,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29381,7 +29381,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29399,7 +29399,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29419,7 +29419,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29438,7 +29438,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29457,7 +29457,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29473,7 +29473,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, qmax) {
@@ -29487,7 +29487,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MULL_PADAL_DUP, strided_cm) {
@@ -29501,7 +29501,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -29517,7 +29517,7 @@
       .m(1)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
@@ -29531,7 +29531,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
@@ -29547,7 +29547,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29564,7 +29564,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29580,7 +29580,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29595,7 +29595,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29613,7 +29613,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29630,7 +29630,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29648,7 +29648,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29665,7 +29665,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29683,7 +29683,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29701,7 +29701,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29719,7 +29719,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29738,7 +29738,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29756,7 +29756,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29774,7 +29774,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29793,7 +29793,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29811,7 +29811,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29830,7 +29830,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29849,7 +29849,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29867,7 +29867,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29887,7 +29887,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -29906,7 +29906,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -29925,7 +29925,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -29941,7 +29941,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, qmax) {
@@ -29955,7 +29955,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
@@ -29969,7 +29969,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -29985,7 +29985,7 @@
       .m(2)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
@@ -29999,7 +29999,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
@@ -30015,7 +30015,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30032,7 +30032,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30048,7 +30048,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30063,7 +30063,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30081,7 +30081,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30098,7 +30098,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30116,7 +30116,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30133,7 +30133,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30151,7 +30151,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30169,7 +30169,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30187,7 +30187,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30206,7 +30206,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30224,7 +30224,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30242,7 +30242,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30261,7 +30261,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30279,7 +30279,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30298,7 +30298,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30317,7 +30317,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30335,7 +30335,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30355,7 +30355,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30374,7 +30374,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30393,7 +30393,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30409,7 +30409,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, qmax) {
@@ -30423,7 +30423,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
@@ -30437,7 +30437,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -30453,7 +30453,7 @@
       .m(3)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
@@ -30467,7 +30467,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
@@ -30483,7 +30483,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30500,7 +30500,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30516,7 +30516,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30531,7 +30531,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30549,7 +30549,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30566,7 +30566,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30584,7 +30584,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30601,7 +30601,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30619,7 +30619,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30637,7 +30637,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30655,7 +30655,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30674,7 +30674,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30692,7 +30692,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30710,7 +30710,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30729,7 +30729,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30747,7 +30747,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30766,7 +30766,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30785,7 +30785,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30803,7 +30803,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30823,7 +30823,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -30842,7 +30842,7 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30861,7 +30861,7 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30877,7 +30877,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, qmax) {
@@ -30891,7 +30891,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
@@ -30905,7 +30905,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -30921,7 +30921,7 @@
       .m(4)
       .n(8)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, strided_cn) {
@@ -30935,7 +30935,7 @@
       .n(8)
       .k(16)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
@@ -30951,7 +30951,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -30968,7 +30968,7 @@
         .n(8)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30984,7 +30984,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -30999,7 +30999,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31017,7 +31017,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31034,7 +31034,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31052,7 +31052,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31069,7 +31069,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31087,7 +31087,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31105,7 +31105,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31123,7 +31123,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31142,7 +31142,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31160,7 +31160,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31178,7 +31178,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31197,7 +31197,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31215,7 +31215,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31234,7 +31234,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31253,7 +31253,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31271,7 +31271,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31291,7 +31291,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31310,7 +31310,7 @@
         .k(k)
         .ks(3)
         .a_offset(331)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31329,7 +31329,7 @@
           .ks(3)
           .a_offset(331)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31345,7 +31345,7 @@
       .n(8)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, qmax) {
@@ -31359,7 +31359,7 @@
       .n(8)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C2__NEON_MLAL_PADAL_DUP, strided_cm) {
@@ -31373,7 +31373,7 @@
       .n(8)
       .k(16)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -31389,7 +31389,7 @@
       .m(1)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
@@ -31403,7 +31403,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
@@ -31419,7 +31419,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31436,7 +31436,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31452,7 +31452,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31467,7 +31467,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31485,7 +31485,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31502,7 +31502,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31520,7 +31520,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31537,7 +31537,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31555,7 +31555,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31573,7 +31573,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31591,7 +31591,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31610,7 +31610,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31628,7 +31628,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31646,7 +31646,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31665,7 +31665,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31683,7 +31683,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31702,7 +31702,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31721,7 +31721,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31739,7 +31739,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31759,7 +31759,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31778,7 +31778,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31797,7 +31797,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31813,7 +31813,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, qmax) {
@@ -31827,7 +31827,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
@@ -31841,7 +31841,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -31857,7 +31857,7 @@
       .m(2)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
@@ -31871,7 +31871,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
@@ -31887,7 +31887,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -31904,7 +31904,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31920,7 +31920,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31935,7 +31935,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31953,7 +31953,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -31970,7 +31970,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -31988,7 +31988,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32005,7 +32005,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32023,7 +32023,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32041,7 +32041,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32059,7 +32059,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32078,7 +32078,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32096,7 +32096,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32114,7 +32114,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32133,7 +32133,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32151,7 +32151,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32170,7 +32170,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32189,7 +32189,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32207,7 +32207,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32227,7 +32227,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32246,7 +32246,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32265,7 +32265,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32281,7 +32281,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, qmax) {
@@ -32295,7 +32295,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
@@ -32309,7 +32309,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -32325,7 +32325,7 @@
       .m(3)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
@@ -32339,7 +32339,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
@@ -32355,7 +32355,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32372,7 +32372,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32388,7 +32388,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32403,7 +32403,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32421,7 +32421,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32438,7 +32438,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32456,7 +32456,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32473,7 +32473,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32491,7 +32491,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32509,7 +32509,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32527,7 +32527,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32546,7 +32546,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32564,7 +32564,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32582,7 +32582,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32601,7 +32601,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32619,7 +32619,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32638,7 +32638,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32657,7 +32657,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32675,7 +32675,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32695,7 +32695,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32714,7 +32714,7 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32733,7 +32733,7 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32749,7 +32749,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, qmax) {
@@ -32763,7 +32763,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
@@ -32777,7 +32777,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -32793,7 +32793,7 @@
       .m(4)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, strided_cn) {
@@ -32807,7 +32807,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, k_eq_16_subtile) {
@@ -32823,7 +32823,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32840,7 +32840,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32856,7 +32856,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32871,7 +32871,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32889,7 +32889,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32906,7 +32906,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32924,7 +32924,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32941,7 +32941,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -32959,7 +32959,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -32977,7 +32977,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -32995,7 +32995,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33014,7 +33014,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33032,7 +33032,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33050,7 +33050,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33069,7 +33069,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33087,7 +33087,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33106,7 +33106,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33125,7 +33125,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33143,7 +33143,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33163,7 +33163,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33182,7 +33182,7 @@
         .k(k)
         .ks(3)
         .a_offset(331)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33201,7 +33201,7 @@
           .ks(3)
           .a_offset(331)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33217,7 +33217,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, qmax) {
@@ -33231,7 +33231,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C2__NEON_MLAL_PADAL_DUP, strided_cm) {
@@ -33245,7 +33245,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -33261,7 +33261,7 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, strided_cn) {
@@ -33275,7 +33275,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
@@ -33291,7 +33291,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33308,7 +33308,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33324,7 +33324,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33339,7 +33339,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33357,7 +33357,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33374,7 +33374,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33392,7 +33392,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33409,7 +33409,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33427,7 +33427,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33445,7 +33445,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33463,7 +33463,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33482,7 +33482,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33500,7 +33500,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33518,7 +33518,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33537,7 +33537,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33555,7 +33555,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33574,7 +33574,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33593,7 +33593,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33611,7 +33611,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33631,7 +33631,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33650,7 +33650,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33669,7 +33669,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33685,7 +33685,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, qmax) {
@@ -33699,7 +33699,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8__NEON_MULL_ADDW_DUP, strided_cm) {
@@ -33713,7 +33713,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -33729,7 +33729,7 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, strided_cn) {
@@ -33743,7 +33743,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
@@ -33759,7 +33759,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33776,7 +33776,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33792,7 +33792,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33807,7 +33807,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33825,7 +33825,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33842,7 +33842,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33860,7 +33860,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33877,7 +33877,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -33895,7 +33895,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33913,7 +33913,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33931,7 +33931,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33950,7 +33950,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -33968,7 +33968,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -33986,7 +33986,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34005,7 +34005,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34023,7 +34023,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34042,7 +34042,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34061,7 +34061,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34079,7 +34079,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34099,7 +34099,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34118,7 +34118,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34137,7 +34137,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34153,7 +34153,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, qmax) {
@@ -34167,7 +34167,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8__NEON_MULL_ADDW_DUP, strided_cm) {
@@ -34181,7 +34181,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -34197,7 +34197,7 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, strided_cn) {
@@ -34211,7 +34211,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
@@ -34227,7 +34227,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34244,7 +34244,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34260,7 +34260,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34275,7 +34275,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34293,7 +34293,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34310,7 +34310,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34328,7 +34328,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34345,7 +34345,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34363,7 +34363,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34381,7 +34381,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34399,7 +34399,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34418,7 +34418,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34436,7 +34436,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34454,7 +34454,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34473,7 +34473,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34491,7 +34491,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34510,7 +34510,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34529,7 +34529,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34547,7 +34547,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34567,7 +34567,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34586,7 +34586,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34605,7 +34605,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34621,7 +34621,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, qmax) {
@@ -34635,7 +34635,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8__NEON_MULL_ADDW_DUP, strided_cm) {
@@ -34649,7 +34649,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -34665,7 +34665,7 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, strided_cn) {
@@ -34679,7 +34679,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
@@ -34695,7 +34695,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34712,7 +34712,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34728,7 +34728,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34743,7 +34743,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34761,7 +34761,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34778,7 +34778,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34796,7 +34796,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34813,7 +34813,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34831,7 +34831,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34849,7 +34849,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34867,7 +34867,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34886,7 +34886,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34904,7 +34904,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34922,7 +34922,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -34941,7 +34941,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34959,7 +34959,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -34978,7 +34978,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -34997,7 +34997,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35015,7 +35015,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35035,7 +35035,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35054,7 +35054,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35073,7 +35073,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35089,7 +35089,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, qmax) {
@@ -35103,7 +35103,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8__NEON_MULL_ADDW_DUP, strided_cm) {
@@ -35117,7 +35117,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -35133,7 +35133,7 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, strided_cn) {
@@ -35147,7 +35147,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
@@ -35163,7 +35163,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35180,7 +35180,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35196,7 +35196,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35211,7 +35211,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35229,7 +35229,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35246,7 +35246,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35264,7 +35264,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35281,7 +35281,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35299,7 +35299,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35317,7 +35317,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35335,7 +35335,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35354,7 +35354,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35372,7 +35372,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35390,7 +35390,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35409,7 +35409,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35427,7 +35427,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35446,7 +35446,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35465,7 +35465,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35483,7 +35483,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35503,7 +35503,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35522,7 +35522,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35541,7 +35541,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35557,7 +35557,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, qmax) {
@@ -35571,7 +35571,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16__NEON_MULL_ADDW_DUP, strided_cm) {
@@ -35585,7 +35585,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -35601,7 +35601,7 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, strided_cn) {
@@ -35615,7 +35615,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
@@ -35631,7 +35631,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35648,7 +35648,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35664,7 +35664,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35679,7 +35679,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35697,7 +35697,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35714,7 +35714,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35732,7 +35732,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35749,7 +35749,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35767,7 +35767,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35785,7 +35785,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35803,7 +35803,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35822,7 +35822,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35840,7 +35840,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35858,7 +35858,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35877,7 +35877,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35895,7 +35895,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -35914,7 +35914,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35933,7 +35933,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35951,7 +35951,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -35971,7 +35971,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -35990,7 +35990,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36009,7 +36009,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36025,7 +36025,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, qmax) {
@@ -36039,7 +36039,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16__NEON_MULL_ADDW_DUP, strided_cm) {
@@ -36053,7 +36053,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -36069,7 +36069,7 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, strided_cn) {
@@ -36083,7 +36083,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
@@ -36099,7 +36099,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36116,7 +36116,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36132,7 +36132,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36147,7 +36147,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36165,7 +36165,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36182,7 +36182,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36200,7 +36200,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36217,7 +36217,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36235,7 +36235,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36253,7 +36253,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36271,7 +36271,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36290,7 +36290,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36308,7 +36308,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36326,7 +36326,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36345,7 +36345,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36363,7 +36363,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36382,7 +36382,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36401,7 +36401,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36419,7 +36419,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36439,7 +36439,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36458,7 +36458,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36477,7 +36477,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36493,7 +36493,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, qmax) {
@@ -36507,7 +36507,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16__NEON_MULL_ADDW_DUP, strided_cm) {
@@ -36521,7 +36521,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -36537,7 +36537,7 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, strided_cn) {
@@ -36551,7 +36551,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
@@ -36567,7 +36567,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36584,7 +36584,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36600,7 +36600,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36615,7 +36615,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36633,7 +36633,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36650,7 +36650,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36668,7 +36668,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36685,7 +36685,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36703,7 +36703,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36721,7 +36721,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36739,7 +36739,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36758,7 +36758,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36776,7 +36776,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36794,7 +36794,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36813,7 +36813,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36831,7 +36831,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36850,7 +36850,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36869,7 +36869,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36887,7 +36887,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36907,7 +36907,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -36926,7 +36926,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -36945,7 +36945,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -36961,7 +36961,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, qmax) {
@@ -36975,7 +36975,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16__NEON_MULL_ADDW_DUP, strided_cm) {
@@ -36989,7 +36989,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
 
@@ -37005,7 +37005,7 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, strided_cn) {
@@ -37019,7 +37019,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, k_eq_8_subtile) {
@@ -37035,7 +37035,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37052,7 +37052,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37068,7 +37068,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37083,7 +37083,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37101,7 +37101,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37118,7 +37118,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37136,7 +37136,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37153,7 +37153,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37171,7 +37171,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37189,7 +37189,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37207,7 +37207,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37226,7 +37226,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37244,7 +37244,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37262,7 +37262,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37281,7 +37281,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37299,7 +37299,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37318,7 +37318,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37337,7 +37337,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37355,7 +37355,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37375,7 +37375,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37394,7 +37394,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37413,7 +37413,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37429,7 +37429,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, qmax) {
@@ -37443,7 +37443,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C4__NEONDOT, strided_cm) {
@@ -37457,7 +37457,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
@@ -37473,7 +37473,7 @@
       .m(4)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, strided_cn) {
@@ -37487,7 +37487,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, k_eq_8_subtile) {
@@ -37503,7 +37503,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37520,7 +37520,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37536,7 +37536,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37551,7 +37551,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37569,7 +37569,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37586,7 +37586,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37604,7 +37604,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37621,7 +37621,7 @@
         .m(4)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37639,7 +37639,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37657,7 +37657,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37675,7 +37675,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37694,7 +37694,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37712,7 +37712,7 @@
           .m(4)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37730,7 +37730,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37749,7 +37749,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37767,7 +37767,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37786,7 +37786,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37805,7 +37805,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37823,7 +37823,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37843,7 +37843,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -37862,7 +37862,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -37881,7 +37881,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37897,7 +37897,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, qmax) {
@@ -37911,7 +37911,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X8C4__NEONDOT, strided_cm) {
@@ -37925,7 +37925,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
@@ -37941,7 +37941,7 @@
       .m(6)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, strided_cn) {
@@ -37955,7 +37955,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, k_eq_8_subtile) {
@@ -37971,7 +37971,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -37988,7 +37988,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38004,7 +38004,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38019,7 +38019,7 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38037,7 +38037,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38054,7 +38054,7 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38072,7 +38072,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38089,7 +38089,7 @@
         .m(6)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38107,7 +38107,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38125,7 +38125,7 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38143,7 +38143,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38162,7 +38162,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38180,7 +38180,7 @@
           .m(6)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38198,7 +38198,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38217,7 +38217,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38235,7 +38235,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38254,7 +38254,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38273,7 +38273,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38291,7 +38291,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38311,7 +38311,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38330,7 +38330,7 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38349,7 +38349,7 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38365,7 +38365,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, qmax) {
@@ -38379,7 +38379,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X8C4__NEONDOT, strided_cm) {
@@ -38393,7 +38393,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
@@ -38409,7 +38409,7 @@
       .m(8)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, strided_cn) {
@@ -38423,7 +38423,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, k_eq_8_subtile) {
@@ -38439,7 +38439,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38456,7 +38456,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38472,7 +38472,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38487,7 +38487,7 @@
         .m(8)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38505,7 +38505,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38522,7 +38522,7 @@
         .m(8)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38540,7 +38540,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38557,7 +38557,7 @@
         .m(8)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38575,7 +38575,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38593,7 +38593,7 @@
           .m(8)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38611,7 +38611,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38630,7 +38630,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38648,7 +38648,7 @@
           .m(8)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38666,7 +38666,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38685,7 +38685,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38703,7 +38703,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38722,7 +38722,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38741,7 +38741,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38759,7 +38759,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38779,7 +38779,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38798,7 +38798,7 @@
         .k(k)
         .ks(3)
         .a_offset(331)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38817,7 +38817,7 @@
           .ks(3)
           .a_offset(331)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38833,7 +38833,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, qmax) {
@@ -38847,7 +38847,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X8C4__NEONDOT, strided_cm) {
@@ -38861,7 +38861,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
@@ -38877,7 +38877,7 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, strided_cn) {
@@ -38891,7 +38891,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, k_eq_8_subtile) {
@@ -38907,7 +38907,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -38924,7 +38924,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38940,7 +38940,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38955,7 +38955,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -38973,7 +38973,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -38990,7 +38990,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39008,7 +39008,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39025,7 +39025,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39043,7 +39043,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39061,7 +39061,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39079,7 +39079,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39098,7 +39098,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39116,7 +39116,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39134,7 +39134,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39153,7 +39153,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39171,7 +39171,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39190,7 +39190,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39209,7 +39209,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39227,7 +39227,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39247,7 +39247,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39266,7 +39266,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39285,7 +39285,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39301,7 +39301,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, qmax) {
@@ -39315,7 +39315,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C4__NEONDOT, strided_cm) {
@@ -39329,7 +39329,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
@@ -39345,7 +39345,7 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, strided_cn) {
@@ -39359,7 +39359,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, k_eq_8_subtile) {
@@ -39375,7 +39375,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39392,7 +39392,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39408,7 +39408,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39423,7 +39423,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39441,7 +39441,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39458,7 +39458,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39476,7 +39476,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39493,7 +39493,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39511,7 +39511,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39529,7 +39529,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39547,7 +39547,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39566,7 +39566,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39584,7 +39584,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39602,7 +39602,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39621,7 +39621,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39639,7 +39639,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39658,7 +39658,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39677,7 +39677,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39695,7 +39695,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39715,7 +39715,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39734,7 +39734,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39753,7 +39753,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39769,7 +39769,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, qmax) {
@@ -39783,7 +39783,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__NEONDOT, strided_cm) {
@@ -39797,7 +39797,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
@@ -39813,7 +39813,7 @@
       .m(6)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, strided_cn) {
@@ -39827,7 +39827,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, k_eq_8_subtile) {
@@ -39843,7 +39843,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -39860,7 +39860,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39876,7 +39876,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39891,7 +39891,7 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39909,7 +39909,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39926,7 +39926,7 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39944,7 +39944,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39961,7 +39961,7 @@
         .m(6)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -39979,7 +39979,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -39997,7 +39997,7 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40015,7 +40015,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40034,7 +40034,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40052,7 +40052,7 @@
           .m(6)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40070,7 +40070,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40089,7 +40089,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40107,7 +40107,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40126,7 +40126,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40145,7 +40145,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40163,7 +40163,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40183,7 +40183,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40202,7 +40202,7 @@
         .k(k)
         .ks(3)
         .a_offset(251)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40221,7 +40221,7 @@
           .ks(3)
           .a_offset(251)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40237,7 +40237,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, qmax) {
@@ -40251,7 +40251,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_6X16C4__NEONDOT, strided_cm) {
@@ -40265,7 +40265,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
@@ -40281,7 +40281,7 @@
       .m(8)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, strided_cn) {
@@ -40295,7 +40295,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, k_eq_8_subtile) {
@@ -40311,7 +40311,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40328,7 +40328,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40344,7 +40344,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40359,7 +40359,7 @@
         .m(8)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40377,7 +40377,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40394,7 +40394,7 @@
         .m(8)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40412,7 +40412,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40429,7 +40429,7 @@
         .m(8)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40447,7 +40447,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40465,7 +40465,7 @@
           .m(8)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40483,7 +40483,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40502,7 +40502,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40520,7 +40520,7 @@
           .m(8)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40538,7 +40538,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40557,7 +40557,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40575,7 +40575,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40594,7 +40594,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40613,7 +40613,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40631,7 +40631,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40651,7 +40651,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40670,7 +40670,7 @@
         .k(k)
         .ks(3)
         .a_offset(331)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40689,7 +40689,7 @@
           .ks(3)
           .a_offset(331)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40705,7 +40705,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, qmax) {
@@ -40719,7 +40719,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_8X16C4__NEONDOT, strided_cm) {
@@ -40733,7 +40733,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
 
@@ -40749,7 +40749,7 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, strided_cn) {
@@ -40763,7 +40763,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, k_eq_8_subtile) {
@@ -40779,7 +40779,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40796,7 +40796,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40812,7 +40812,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40827,7 +40827,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40845,7 +40845,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40862,7 +40862,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40880,7 +40880,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40897,7 +40897,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -40915,7 +40915,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40933,7 +40933,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40951,7 +40951,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -40970,7 +40970,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -40988,7 +40988,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41006,7 +41006,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41025,7 +41025,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41043,7 +41043,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41062,7 +41062,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41081,7 +41081,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41099,7 +41099,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41119,7 +41119,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41138,7 +41138,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41157,7 +41157,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41173,7 +41173,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, qmax) {
@@ -41187,7 +41187,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_LD64, strided_cm) {
@@ -41201,7 +41201,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -41217,7 +41217,7 @@
       .m(4)
       .n(16)
       .k(16)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) {
@@ -41231,7 +41231,7 @@
       .n(16)
       .k(16)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile) {
@@ -41247,7 +41247,7 @@
           .n(n)
           .k(16)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41264,7 +41264,7 @@
         .n(16)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41280,7 +41280,7 @@
         .n(n)
         .k(16)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41295,7 +41295,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41313,7 +41313,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41330,7 +41330,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41348,7 +41348,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41365,7 +41365,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41383,7 +41383,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41401,7 +41401,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41419,7 +41419,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41438,7 +41438,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41456,7 +41456,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41474,7 +41474,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41493,7 +41493,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41511,7 +41511,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41530,7 +41530,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41549,7 +41549,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41567,7 +41567,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41587,7 +41587,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41606,7 +41606,7 @@
         .k(k)
         .ks(3)
         .a_offset(331)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41625,7 +41625,7 @@
           .ks(3)
           .a_offset(331)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41641,7 +41641,7 @@
       .n(16)
       .k(16)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmax) {
@@ -41655,7 +41655,7 @@
       .n(16)
       .k(16)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) {
@@ -41669,7 +41669,7 @@
       .n(16)
       .k(16)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_gemmlowp_neon_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_ARM64
 
@@ -41685,7 +41685,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, strided_cn) {
@@ -41699,7 +41699,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, k_eq_8_subtile) {
@@ -41715,7 +41715,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41732,7 +41732,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41748,7 +41748,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41763,7 +41763,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41781,7 +41781,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41798,7 +41798,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41816,7 +41816,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41833,7 +41833,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41851,7 +41851,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41869,7 +41869,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41887,7 +41887,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41906,7 +41906,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41924,7 +41924,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41942,7 +41942,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -41961,7 +41961,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -41979,7 +41979,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -41998,7 +41998,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42017,7 +42017,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42035,7 +42035,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42055,7 +42055,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42074,7 +42074,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42093,7 +42093,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42109,7 +42109,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, qmax) {
@@ -42123,7 +42123,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD64, strided_cm) {
@@ -42137,7 +42137,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -42153,7 +42153,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, strided_cn) {
@@ -42167,7 +42167,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, k_eq_8_subtile) {
@@ -42183,7 +42183,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42200,7 +42200,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42216,7 +42216,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42231,7 +42231,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42249,7 +42249,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42266,7 +42266,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42284,7 +42284,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42301,7 +42301,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42319,7 +42319,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42337,7 +42337,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42355,7 +42355,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42374,7 +42374,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42392,7 +42392,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42410,7 +42410,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42429,7 +42429,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42447,7 +42447,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42466,7 +42466,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42485,7 +42485,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42503,7 +42503,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42523,7 +42523,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42542,7 +42542,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42561,7 +42561,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42577,7 +42577,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, qmax) {
@@ -42591,7 +42591,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD64, strided_cm) {
@@ -42605,7 +42605,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -42621,7 +42621,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, strided_cn) {
@@ -42635,7 +42635,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, k_eq_8_subtile) {
@@ -42651,7 +42651,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42668,7 +42668,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42684,7 +42684,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42699,7 +42699,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42717,7 +42717,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42734,7 +42734,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42752,7 +42752,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42769,7 +42769,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42787,7 +42787,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42805,7 +42805,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42823,7 +42823,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42842,7 +42842,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42860,7 +42860,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42878,7 +42878,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42897,7 +42897,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42915,7 +42915,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -42934,7 +42934,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -42953,7 +42953,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42971,7 +42971,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -42991,7 +42991,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43010,7 +43010,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43029,7 +43029,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43045,7 +43045,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, qmax) {
@@ -43059,7 +43059,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD64, strided_cm) {
@@ -43073,7 +43073,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -43089,7 +43089,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, strided_cn) {
@@ -43103,7 +43103,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, k_eq_8_subtile) {
@@ -43119,7 +43119,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43136,7 +43136,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43152,7 +43152,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43167,7 +43167,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43185,7 +43185,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43202,7 +43202,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43220,7 +43220,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43237,7 +43237,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43255,7 +43255,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43273,7 +43273,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43291,7 +43291,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43310,7 +43310,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43328,7 +43328,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43346,7 +43346,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43365,7 +43365,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43383,7 +43383,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43402,7 +43402,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43421,7 +43421,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43439,7 +43439,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43459,7 +43459,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43478,7 +43478,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43497,7 +43497,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43513,7 +43513,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, qmax) {
@@ -43527,7 +43527,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD64, strided_cm) {
@@ -43541,7 +43541,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -43557,7 +43557,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, strided_cn) {
@@ -43571,7 +43571,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, k_eq_8_subtile) {
@@ -43587,7 +43587,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43604,7 +43604,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43620,7 +43620,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43635,7 +43635,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43653,7 +43653,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43670,7 +43670,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43688,7 +43688,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43705,7 +43705,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43723,7 +43723,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43741,7 +43741,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43759,7 +43759,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43778,7 +43778,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43796,7 +43796,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43814,7 +43814,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43833,7 +43833,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43851,7 +43851,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43870,7 +43870,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43889,7 +43889,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43907,7 +43907,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43927,7 +43927,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -43946,7 +43946,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -43965,7 +43965,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -43981,7 +43981,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, qmax) {
@@ -43995,7 +43995,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD64, strided_cm) {
@@ -44009,7 +44009,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -44025,7 +44025,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, strided_cn) {
@@ -44039,7 +44039,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, k_eq_8_subtile) {
@@ -44055,7 +44055,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44072,7 +44072,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44088,7 +44088,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44103,7 +44103,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44121,7 +44121,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44138,7 +44138,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44156,7 +44156,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44173,7 +44173,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44191,7 +44191,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44209,7 +44209,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44227,7 +44227,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44246,7 +44246,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44264,7 +44264,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44282,7 +44282,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44301,7 +44301,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44319,7 +44319,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44338,7 +44338,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44357,7 +44357,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44375,7 +44375,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44395,7 +44395,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44414,7 +44414,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44433,7 +44433,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44449,7 +44449,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, qmax) {
@@ -44463,7 +44463,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD64, strided_cm) {
@@ -44477,7 +44477,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -44493,7 +44493,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, strided_cn) {
@@ -44507,7 +44507,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, k_eq_8_subtile) {
@@ -44523,7 +44523,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44540,7 +44540,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44556,7 +44556,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44571,7 +44571,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44589,7 +44589,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44606,7 +44606,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44624,7 +44624,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44641,7 +44641,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44659,7 +44659,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44677,7 +44677,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44695,7 +44695,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44714,7 +44714,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44732,7 +44732,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44750,7 +44750,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44769,7 +44769,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44787,7 +44787,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44806,7 +44806,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44825,7 +44825,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44843,7 +44843,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44863,7 +44863,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -44882,7 +44882,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -44901,7 +44901,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -44917,7 +44917,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, qmax) {
@@ -44931,7 +44931,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD64, strided_cm) {
@@ -44945,7 +44945,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -44961,7 +44961,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, strided_cn) {
@@ -44975,7 +44975,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, k_eq_8_subtile) {
@@ -44991,7 +44991,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45008,7 +45008,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45024,7 +45024,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45039,7 +45039,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45057,7 +45057,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45074,7 +45074,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45092,7 +45092,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45109,7 +45109,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45127,7 +45127,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45145,7 +45145,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45163,7 +45163,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45182,7 +45182,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45200,7 +45200,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45218,7 +45218,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45237,7 +45237,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45255,7 +45255,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45274,7 +45274,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45293,7 +45293,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45311,7 +45311,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45331,7 +45331,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45350,7 +45350,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45369,7 +45369,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45385,7 +45385,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, qmax) {
@@ -45399,7 +45399,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD64, strided_cm) {
@@ -45413,7 +45413,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -45429,7 +45429,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, strided_cn) {
@@ -45443,7 +45443,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, k_eq_8_subtile) {
@@ -45459,7 +45459,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45476,7 +45476,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45492,7 +45492,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45507,7 +45507,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45525,7 +45525,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45542,7 +45542,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45560,7 +45560,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45577,7 +45577,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45595,7 +45595,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45613,7 +45613,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45631,7 +45631,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45650,7 +45650,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45668,7 +45668,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45686,7 +45686,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45705,7 +45705,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45723,7 +45723,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45742,7 +45742,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45761,7 +45761,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45779,7 +45779,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45799,7 +45799,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -45818,7 +45818,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45837,7 +45837,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45853,7 +45853,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, qmax) {
@@ -45867,7 +45867,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD64, strided_cm) {
@@ -45881,7 +45881,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -45897,7 +45897,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, strided_cn) {
@@ -45911,7 +45911,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, k_eq_8_subtile) {
@@ -45927,7 +45927,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -45944,7 +45944,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45960,7 +45960,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45975,7 +45975,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -45993,7 +45993,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46010,7 +46010,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46028,7 +46028,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46045,7 +46045,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46063,7 +46063,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46081,7 +46081,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46099,7 +46099,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46118,7 +46118,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46136,7 +46136,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46154,7 +46154,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46173,7 +46173,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46191,7 +46191,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46210,7 +46210,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46229,7 +46229,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46247,7 +46247,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46267,7 +46267,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46286,7 +46286,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46305,7 +46305,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46321,7 +46321,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, qmax) {
@@ -46335,7 +46335,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD64, strided_cm) {
@@ -46349,7 +46349,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -46365,7 +46365,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, strided_cn) {
@@ -46379,7 +46379,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, k_eq_8_subtile) {
@@ -46395,7 +46395,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46412,7 +46412,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46428,7 +46428,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46443,7 +46443,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46461,7 +46461,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46478,7 +46478,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46496,7 +46496,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46513,7 +46513,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46531,7 +46531,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46549,7 +46549,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46567,7 +46567,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46586,7 +46586,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46604,7 +46604,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46622,7 +46622,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46641,7 +46641,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46659,7 +46659,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46678,7 +46678,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46697,7 +46697,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46715,7 +46715,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46735,7 +46735,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46754,7 +46754,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46773,7 +46773,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46789,7 +46789,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, qmax) {
@@ -46803,7 +46803,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD64, strided_cm) {
@@ -46817,7 +46817,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -46833,7 +46833,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, strided_cn) {
@@ -46847,7 +46847,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, k_eq_8_subtile) {
@@ -46863,7 +46863,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -46880,7 +46880,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46896,7 +46896,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46911,7 +46911,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46929,7 +46929,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46946,7 +46946,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46964,7 +46964,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -46981,7 +46981,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -46999,7 +46999,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47017,7 +47017,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47035,7 +47035,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47054,7 +47054,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47072,7 +47072,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47090,7 +47090,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47109,7 +47109,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47127,7 +47127,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47146,7 +47146,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47165,7 +47165,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47183,7 +47183,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47203,7 +47203,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47222,7 +47222,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47241,7 +47241,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47257,7 +47257,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, qmax) {
@@ -47271,7 +47271,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD64, strided_cm) {
@@ -47285,7 +47285,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -47301,7 +47301,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, strided_cn) {
@@ -47315,7 +47315,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, k_eq_8_subtile) {
@@ -47331,7 +47331,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47348,7 +47348,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47364,7 +47364,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47379,7 +47379,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47397,7 +47397,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47414,7 +47414,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47432,7 +47432,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47449,7 +47449,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47467,7 +47467,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47485,7 +47485,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47503,7 +47503,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47522,7 +47522,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47540,7 +47540,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47558,7 +47558,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47577,7 +47577,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47595,7 +47595,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47614,7 +47614,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47633,7 +47633,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47651,7 +47651,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47671,7 +47671,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47690,7 +47690,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47709,7 +47709,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47725,7 +47725,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, qmax) {
@@ -47739,7 +47739,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD64, strided_cm) {
@@ -47753,7 +47753,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -47769,7 +47769,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, strided_cn) {
@@ -47783,7 +47783,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, k_eq_8_subtile) {
@@ -47799,7 +47799,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47816,7 +47816,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47832,7 +47832,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47847,7 +47847,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47865,7 +47865,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47882,7 +47882,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47900,7 +47900,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47917,7 +47917,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -47935,7 +47935,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -47953,7 +47953,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47971,7 +47971,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -47990,7 +47990,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48008,7 +48008,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48026,7 +48026,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48045,7 +48045,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48063,7 +48063,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48082,7 +48082,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48101,7 +48101,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48119,7 +48119,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48139,7 +48139,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48158,7 +48158,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48177,7 +48177,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48193,7 +48193,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, qmax) {
@@ -48207,7 +48207,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD64, strided_cm) {
@@ -48221,7 +48221,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -48237,7 +48237,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, strided_cn) {
@@ -48251,7 +48251,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, k_eq_8_subtile) {
@@ -48267,7 +48267,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48284,7 +48284,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48300,7 +48300,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48315,7 +48315,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48333,7 +48333,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48350,7 +48350,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48368,7 +48368,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48385,7 +48385,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48403,7 +48403,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48421,7 +48421,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48439,7 +48439,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48458,7 +48458,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48476,7 +48476,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48494,7 +48494,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48513,7 +48513,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48531,7 +48531,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48550,7 +48550,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48569,7 +48569,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48587,7 +48587,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48607,7 +48607,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48626,7 +48626,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48645,7 +48645,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48661,7 +48661,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, qmax) {
@@ -48675,7 +48675,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD64, strided_cm) {
@@ -48689,7 +48689,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -48705,7 +48705,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, strided_cn) {
@@ -48719,7 +48719,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, k_eq_8_subtile) {
@@ -48735,7 +48735,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48752,7 +48752,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48768,7 +48768,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48783,7 +48783,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48801,7 +48801,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48818,7 +48818,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48836,7 +48836,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48853,7 +48853,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -48871,7 +48871,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48889,7 +48889,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48907,7 +48907,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48926,7 +48926,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48944,7 +48944,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48962,7 +48962,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -48981,7 +48981,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -48999,7 +48999,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49018,7 +49018,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49037,7 +49037,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49055,7 +49055,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49075,7 +49075,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49094,7 +49094,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49113,7 +49113,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49129,7 +49129,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, qmax) {
@@ -49143,7 +49143,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD64, strided_cm) {
@@ -49157,7 +49157,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -49173,7 +49173,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, strided_cn) {
@@ -49187,7 +49187,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, k_eq_8_subtile) {
@@ -49203,7 +49203,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49220,7 +49220,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49236,7 +49236,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49251,7 +49251,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49269,7 +49269,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49286,7 +49286,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49304,7 +49304,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49321,7 +49321,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49339,7 +49339,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49357,7 +49357,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49375,7 +49375,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49394,7 +49394,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49412,7 +49412,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49430,7 +49430,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49449,7 +49449,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49467,7 +49467,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49486,7 +49486,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49505,7 +49505,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49523,7 +49523,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49543,7 +49543,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49562,7 +49562,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49581,7 +49581,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49597,7 +49597,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, qmax) {
@@ -49611,7 +49611,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE2_LD128, strided_cm) {
@@ -49625,7 +49625,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -49641,7 +49641,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, strided_cn) {
@@ -49655,7 +49655,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, k_eq_8_subtile) {
@@ -49671,7 +49671,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49688,7 +49688,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49704,7 +49704,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49719,7 +49719,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49737,7 +49737,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49754,7 +49754,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49772,7 +49772,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49789,7 +49789,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49807,7 +49807,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49825,7 +49825,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49843,7 +49843,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49862,7 +49862,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49880,7 +49880,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49898,7 +49898,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49917,7 +49917,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49935,7 +49935,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -49954,7 +49954,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -49973,7 +49973,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -49991,7 +49991,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50011,7 +50011,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50030,7 +50030,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50049,7 +50049,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50065,7 +50065,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, qmax) {
@@ -50079,7 +50079,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE2_LD128, strided_cm) {
@@ -50093,7 +50093,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -50109,7 +50109,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, strided_cn) {
@@ -50123,7 +50123,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, k_eq_8_subtile) {
@@ -50139,7 +50139,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50156,7 +50156,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50172,7 +50172,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50187,7 +50187,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50205,7 +50205,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50222,7 +50222,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50240,7 +50240,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50257,7 +50257,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50275,7 +50275,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50293,7 +50293,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50311,7 +50311,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50330,7 +50330,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50348,7 +50348,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50366,7 +50366,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50385,7 +50385,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50403,7 +50403,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50422,7 +50422,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50441,7 +50441,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50459,7 +50459,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50479,7 +50479,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50498,7 +50498,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50517,7 +50517,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50533,7 +50533,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, qmax) {
@@ -50547,7 +50547,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE2_LD128, strided_cm) {
@@ -50561,7 +50561,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -50577,7 +50577,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, strided_cn) {
@@ -50591,7 +50591,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, k_eq_8_subtile) {
@@ -50607,7 +50607,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50624,7 +50624,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50640,7 +50640,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50655,7 +50655,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50673,7 +50673,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50690,7 +50690,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50708,7 +50708,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50725,7 +50725,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50743,7 +50743,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50761,7 +50761,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50779,7 +50779,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50798,7 +50798,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50816,7 +50816,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50834,7 +50834,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50853,7 +50853,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50871,7 +50871,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50890,7 +50890,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50909,7 +50909,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50927,7 +50927,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -50947,7 +50947,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -50966,7 +50966,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -50985,7 +50985,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51001,7 +51001,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, qmax) {
@@ -51015,7 +51015,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE2_LD128, strided_cm) {
@@ -51029,7 +51029,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -51045,7 +51045,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, strided_cn) {
@@ -51059,7 +51059,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, k_eq_8_subtile) {
@@ -51075,7 +51075,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51092,7 +51092,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51108,7 +51108,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51123,7 +51123,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51141,7 +51141,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51158,7 +51158,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51176,7 +51176,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51193,7 +51193,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51211,7 +51211,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51229,7 +51229,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51247,7 +51247,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51266,7 +51266,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51284,7 +51284,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51302,7 +51302,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51321,7 +51321,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51339,7 +51339,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51358,7 +51358,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51377,7 +51377,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51395,7 +51395,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51415,7 +51415,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51434,7 +51434,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51453,7 +51453,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51469,7 +51469,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, qmax) {
@@ -51483,7 +51483,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSSE3_LD128, strided_cm) {
@@ -51497,7 +51497,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -51513,7 +51513,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, strided_cn) {
@@ -51527,7 +51527,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, k_eq_8_subtile) {
@@ -51543,7 +51543,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51560,7 +51560,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51576,7 +51576,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51591,7 +51591,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51609,7 +51609,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51626,7 +51626,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51644,7 +51644,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51661,7 +51661,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51679,7 +51679,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51697,7 +51697,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51715,7 +51715,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51734,7 +51734,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51752,7 +51752,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51770,7 +51770,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51789,7 +51789,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51807,7 +51807,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51826,7 +51826,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51845,7 +51845,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51863,7 +51863,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51883,7 +51883,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -51902,7 +51902,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -51921,7 +51921,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -51937,7 +51937,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, qmax) {
@@ -51951,7 +51951,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSSE3_LD128, strided_cm) {
@@ -51965,7 +51965,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -51981,7 +51981,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, strided_cn) {
@@ -51995,7 +51995,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, k_eq_8_subtile) {
@@ -52011,7 +52011,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52028,7 +52028,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52044,7 +52044,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52059,7 +52059,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52077,7 +52077,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52094,7 +52094,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52112,7 +52112,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52129,7 +52129,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52147,7 +52147,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52165,7 +52165,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52183,7 +52183,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52202,7 +52202,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52220,7 +52220,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52238,7 +52238,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52257,7 +52257,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52275,7 +52275,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52294,7 +52294,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52313,7 +52313,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52331,7 +52331,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52351,7 +52351,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52370,7 +52370,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52389,7 +52389,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52405,7 +52405,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, qmax) {
@@ -52419,7 +52419,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSSE3_LD128, strided_cm) {
@@ -52433,7 +52433,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -52449,7 +52449,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, strided_cn) {
@@ -52463,7 +52463,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, k_eq_8_subtile) {
@@ -52479,7 +52479,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52496,7 +52496,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52512,7 +52512,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52527,7 +52527,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52545,7 +52545,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52562,7 +52562,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52580,7 +52580,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52597,7 +52597,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52615,7 +52615,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52633,7 +52633,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52651,7 +52651,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52670,7 +52670,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52688,7 +52688,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52706,7 +52706,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52725,7 +52725,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52743,7 +52743,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52762,7 +52762,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52781,7 +52781,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52799,7 +52799,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52819,7 +52819,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -52838,7 +52838,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52857,7 +52857,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52873,7 +52873,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, qmax) {
@@ -52887,7 +52887,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSSE3_LD128, strided_cm) {
@@ -52901,7 +52901,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -52917,7 +52917,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, strided_cn) {
@@ -52931,7 +52931,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, k_eq_8_subtile) {
@@ -52947,7 +52947,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -52964,7 +52964,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52980,7 +52980,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -52995,7 +52995,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53013,7 +53013,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53030,7 +53030,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53048,7 +53048,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53065,7 +53065,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53083,7 +53083,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53101,7 +53101,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53119,7 +53119,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53138,7 +53138,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53156,7 +53156,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53174,7 +53174,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53193,7 +53193,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53211,7 +53211,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53230,7 +53230,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53249,7 +53249,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53267,7 +53267,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53287,7 +53287,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53306,7 +53306,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53325,7 +53325,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53341,7 +53341,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, qmax) {
@@ -53355,7 +53355,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__SSE41_LD128, strided_cm) {
@@ -53369,7 +53369,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -53385,7 +53385,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, strided_cn) {
@@ -53399,7 +53399,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, k_eq_8_subtile) {
@@ -53415,7 +53415,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53432,7 +53432,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53448,7 +53448,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53463,7 +53463,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53481,7 +53481,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53498,7 +53498,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53516,7 +53516,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53533,7 +53533,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53551,7 +53551,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53569,7 +53569,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53587,7 +53587,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53606,7 +53606,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53624,7 +53624,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53642,7 +53642,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53661,7 +53661,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53679,7 +53679,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53698,7 +53698,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53717,7 +53717,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53735,7 +53735,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53755,7 +53755,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53774,7 +53774,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53793,7 +53793,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53809,7 +53809,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, qmax) {
@@ -53823,7 +53823,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__SSE41_LD128, strided_cm) {
@@ -53837,7 +53837,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -53853,7 +53853,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, strided_cn) {
@@ -53867,7 +53867,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, k_eq_8_subtile) {
@@ -53883,7 +53883,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -53900,7 +53900,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53916,7 +53916,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53931,7 +53931,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53949,7 +53949,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -53966,7 +53966,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -53984,7 +53984,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54001,7 +54001,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54019,7 +54019,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54037,7 +54037,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54055,7 +54055,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54074,7 +54074,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54092,7 +54092,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54110,7 +54110,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54129,7 +54129,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54147,7 +54147,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54166,7 +54166,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54185,7 +54185,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54203,7 +54203,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54223,7 +54223,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54242,7 +54242,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54261,7 +54261,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54277,7 +54277,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, qmax) {
@@ -54291,7 +54291,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__SSE41_LD128, strided_cm) {
@@ -54305,7 +54305,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -54321,7 +54321,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, strided_cn) {
@@ -54335,7 +54335,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, k_eq_8_subtile) {
@@ -54351,7 +54351,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54368,7 +54368,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54384,7 +54384,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54399,7 +54399,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54417,7 +54417,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54434,7 +54434,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54452,7 +54452,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54469,7 +54469,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54487,7 +54487,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54505,7 +54505,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54523,7 +54523,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54542,7 +54542,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54560,7 +54560,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54578,7 +54578,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54597,7 +54597,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54615,7 +54615,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54634,7 +54634,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54653,7 +54653,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54671,7 +54671,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54691,7 +54691,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54710,7 +54710,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54729,7 +54729,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54745,7 +54745,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, qmax) {
@@ -54759,7 +54759,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__SSE41_LD128, strided_cm) {
@@ -54773,7 +54773,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -54789,7 +54789,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, strided_cn) {
@@ -54803,7 +54803,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, k_eq_8_subtile) {
@@ -54819,7 +54819,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54836,7 +54836,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54852,7 +54852,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54867,7 +54867,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54885,7 +54885,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54902,7 +54902,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54920,7 +54920,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54937,7 +54937,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -54955,7 +54955,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -54973,7 +54973,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -54991,7 +54991,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55010,7 +55010,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55028,7 +55028,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55046,7 +55046,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55065,7 +55065,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55083,7 +55083,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55102,7 +55102,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55121,7 +55121,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55139,7 +55139,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55159,7 +55159,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55178,7 +55178,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55197,7 +55197,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55213,7 +55213,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, qmax) {
@@ -55227,7 +55227,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__AVX_LD128, strided_cm) {
@@ -55241,7 +55241,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -55257,7 +55257,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, strided_cn) {
@@ -55271,7 +55271,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, k_eq_8_subtile) {
@@ -55287,7 +55287,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55304,7 +55304,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55320,7 +55320,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55335,7 +55335,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55353,7 +55353,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55370,7 +55370,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55388,7 +55388,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55405,7 +55405,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55423,7 +55423,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55441,7 +55441,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55459,7 +55459,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55478,7 +55478,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55496,7 +55496,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55514,7 +55514,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55533,7 +55533,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55551,7 +55551,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55570,7 +55570,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55589,7 +55589,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55607,7 +55607,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55627,7 +55627,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55646,7 +55646,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55665,7 +55665,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55681,7 +55681,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, qmax) {
@@ -55695,7 +55695,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__AVX_LD128, strided_cm) {
@@ -55709,7 +55709,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -55725,7 +55725,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, strided_cn) {
@@ -55739,7 +55739,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, k_eq_8_subtile) {
@@ -55755,7 +55755,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55772,7 +55772,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55788,7 +55788,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55803,7 +55803,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55821,7 +55821,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55838,7 +55838,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55856,7 +55856,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55873,7 +55873,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -55891,7 +55891,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55909,7 +55909,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55927,7 +55927,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55946,7 +55946,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -55964,7 +55964,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -55982,7 +55982,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56001,7 +56001,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56019,7 +56019,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56038,7 +56038,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56057,7 +56057,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56075,7 +56075,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56095,7 +56095,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56114,7 +56114,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56133,7 +56133,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56149,7 +56149,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, qmax) {
@@ -56163,7 +56163,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__AVX_LD128, strided_cm) {
@@ -56177,7 +56177,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -56193,7 +56193,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, strided_cn) {
@@ -56207,7 +56207,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, k_eq_8_subtile) {
@@ -56223,7 +56223,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56240,7 +56240,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56256,7 +56256,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56271,7 +56271,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56289,7 +56289,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56306,7 +56306,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56324,7 +56324,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56341,7 +56341,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56359,7 +56359,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56377,7 +56377,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56395,7 +56395,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56414,7 +56414,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56432,7 +56432,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56450,7 +56450,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56469,7 +56469,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56487,7 +56487,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56506,7 +56506,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56525,7 +56525,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56543,7 +56543,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56563,7 +56563,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56582,7 +56582,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56601,7 +56601,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56617,7 +56617,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, qmax) {
@@ -56631,7 +56631,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__AVX_LD128, strided_cm) {
@@ -56645,7 +56645,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -56661,7 +56661,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, strided_cn) {
@@ -56675,7 +56675,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, k_eq_8_subtile) {
@@ -56691,7 +56691,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56708,7 +56708,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56724,7 +56724,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56739,7 +56739,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56757,7 +56757,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56774,7 +56774,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56792,7 +56792,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56809,7 +56809,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56827,7 +56827,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56845,7 +56845,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56863,7 +56863,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56882,7 +56882,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56900,7 +56900,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56918,7 +56918,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -56937,7 +56937,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56955,7 +56955,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -56974,7 +56974,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -56993,7 +56993,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57011,7 +57011,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57031,7 +57031,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57050,7 +57050,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57069,7 +57069,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57085,7 +57085,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, qmax) {
@@ -57099,7 +57099,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C2__XOP_LD128, strided_cm) {
@@ -57113,7 +57113,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -57129,7 +57129,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, strided_cn) {
@@ -57143,7 +57143,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, k_eq_8_subtile) {
@@ -57159,7 +57159,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57176,7 +57176,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57192,7 +57192,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57207,7 +57207,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57225,7 +57225,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57242,7 +57242,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57260,7 +57260,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57277,7 +57277,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57295,7 +57295,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57313,7 +57313,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57331,7 +57331,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57350,7 +57350,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57368,7 +57368,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57386,7 +57386,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57405,7 +57405,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57423,7 +57423,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57442,7 +57442,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57461,7 +57461,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57479,7 +57479,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57499,7 +57499,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57518,7 +57518,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57537,7 +57537,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57553,7 +57553,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, qmax) {
@@ -57567,7 +57567,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C2__XOP_LD128, strided_cm) {
@@ -57581,7 +57581,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -57597,7 +57597,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, strided_cn) {
@@ -57611,7 +57611,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, k_eq_8_subtile) {
@@ -57627,7 +57627,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57644,7 +57644,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57660,7 +57660,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57675,7 +57675,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57693,7 +57693,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57710,7 +57710,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57728,7 +57728,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57745,7 +57745,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57763,7 +57763,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57781,7 +57781,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57799,7 +57799,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57818,7 +57818,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57836,7 +57836,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57854,7 +57854,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57873,7 +57873,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57891,7 +57891,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -57910,7 +57910,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57929,7 +57929,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57947,7 +57947,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -57967,7 +57967,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -57986,7 +57986,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58005,7 +58005,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58021,7 +58021,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, qmax) {
@@ -58035,7 +58035,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C2__XOP_LD128, strided_cm) {
@@ -58049,7 +58049,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -58065,7 +58065,7 @@
       .m(4)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, strided_cn) {
@@ -58079,7 +58079,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, k_eq_8_subtile) {
@@ -58095,7 +58095,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58112,7 +58112,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58128,7 +58128,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58143,7 +58143,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58161,7 +58161,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58178,7 +58178,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58196,7 +58196,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58213,7 +58213,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58231,7 +58231,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58249,7 +58249,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58267,7 +58267,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58286,7 +58286,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58304,7 +58304,7 @@
           .m(4)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58322,7 +58322,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58341,7 +58341,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58359,7 +58359,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58378,7 +58378,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58397,7 +58397,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58415,7 +58415,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58435,7 +58435,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58454,7 +58454,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58473,7 +58473,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58489,7 +58489,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, qmax) {
@@ -58503,7 +58503,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4C2__XOP_LD128, strided_cm) {
@@ -58517,7 +58517,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -58533,7 +58533,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, strided_cn) {
@@ -58547,7 +58547,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, k_eq_8_subtile) {
@@ -58563,7 +58563,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58580,7 +58580,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58596,7 +58596,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58611,7 +58611,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58629,7 +58629,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58646,7 +58646,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58664,7 +58664,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58681,7 +58681,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58699,7 +58699,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58717,7 +58717,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58735,7 +58735,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58754,7 +58754,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58772,7 +58772,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58790,7 +58790,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58809,7 +58809,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58827,7 +58827,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58846,7 +58846,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58865,7 +58865,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58883,7 +58883,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58903,7 +58903,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -58922,7 +58922,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -58941,7 +58941,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -58957,7 +58957,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, qmax) {
@@ -58971,7 +58971,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD64, strided_cm) {
@@ -58985,7 +58985,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -59001,7 +59001,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, strided_cn) {
@@ -59015,7 +59015,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, k_eq_8_subtile) {
@@ -59031,7 +59031,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59048,7 +59048,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59064,7 +59064,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59079,7 +59079,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59097,7 +59097,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59114,7 +59114,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59132,7 +59132,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59149,7 +59149,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59167,7 +59167,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59185,7 +59185,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59203,7 +59203,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59222,7 +59222,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59240,7 +59240,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59258,7 +59258,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59277,7 +59277,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59295,7 +59295,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59314,7 +59314,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59333,7 +59333,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59351,7 +59351,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59371,7 +59371,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59390,7 +59390,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59409,7 +59409,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59425,7 +59425,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, qmax) {
@@ -59439,7 +59439,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD64, strided_cm) {
@@ -59453,7 +59453,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -59469,7 +59469,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, strided_cn) {
@@ -59483,7 +59483,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, k_eq_8_subtile) {
@@ -59499,7 +59499,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59516,7 +59516,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59532,7 +59532,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59547,7 +59547,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59565,7 +59565,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59582,7 +59582,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59600,7 +59600,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59617,7 +59617,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59635,7 +59635,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59653,7 +59653,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59671,7 +59671,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59690,7 +59690,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59708,7 +59708,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59726,7 +59726,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59745,7 +59745,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59763,7 +59763,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59782,7 +59782,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59801,7 +59801,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59819,7 +59819,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59839,7 +59839,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -59858,7 +59858,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -59877,7 +59877,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59893,7 +59893,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, qmax) {
@@ -59907,7 +59907,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD64, strided_cm) {
@@ -59921,7 +59921,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -59937,7 +59937,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, strided_cn) {
@@ -59951,7 +59951,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, k_eq_8_subtile) {
@@ -59967,7 +59967,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -59984,7 +59984,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60000,7 +60000,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60015,7 +60015,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60033,7 +60033,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60050,7 +60050,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60068,7 +60068,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60085,7 +60085,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60103,7 +60103,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60121,7 +60121,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60139,7 +60139,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60158,7 +60158,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60176,7 +60176,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60194,7 +60194,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60213,7 +60213,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60231,7 +60231,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60250,7 +60250,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60269,7 +60269,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60287,7 +60287,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60307,7 +60307,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60326,7 +60326,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60345,7 +60345,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60361,7 +60361,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, qmax) {
@@ -60375,7 +60375,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD64, strided_cm) {
@@ -60389,7 +60389,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -60405,7 +60405,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, strided_cn) {
@@ -60419,7 +60419,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, k_eq_8_subtile) {
@@ -60435,7 +60435,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60452,7 +60452,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60468,7 +60468,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60483,7 +60483,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60501,7 +60501,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60518,7 +60518,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60536,7 +60536,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60553,7 +60553,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60571,7 +60571,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60589,7 +60589,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60607,7 +60607,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60626,7 +60626,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60644,7 +60644,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60662,7 +60662,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60681,7 +60681,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60699,7 +60699,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60718,7 +60718,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60737,7 +60737,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60755,7 +60755,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60775,7 +60775,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60794,7 +60794,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60813,7 +60813,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60829,7 +60829,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, qmax) {
@@ -60843,7 +60843,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD64, strided_cm) {
@@ -60857,7 +60857,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -60873,7 +60873,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, strided_cn) {
@@ -60887,7 +60887,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, k_eq_8_subtile) {
@@ -60903,7 +60903,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -60920,7 +60920,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60936,7 +60936,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60951,7 +60951,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -60969,7 +60969,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -60986,7 +60986,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61004,7 +61004,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61021,7 +61021,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61039,7 +61039,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61057,7 +61057,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61075,7 +61075,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61094,7 +61094,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61112,7 +61112,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61130,7 +61130,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61149,7 +61149,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61167,7 +61167,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61186,7 +61186,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61205,7 +61205,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61223,7 +61223,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61243,7 +61243,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61262,7 +61262,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61281,7 +61281,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61297,7 +61297,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, qmax) {
@@ -61311,7 +61311,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD64, strided_cm) {
@@ -61325,7 +61325,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -61341,7 +61341,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, strided_cn) {
@@ -61355,7 +61355,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, k_eq_8_subtile) {
@@ -61371,7 +61371,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61388,7 +61388,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61404,7 +61404,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61419,7 +61419,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61437,7 +61437,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61454,7 +61454,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61472,7 +61472,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61489,7 +61489,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61507,7 +61507,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61525,7 +61525,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61543,7 +61543,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61562,7 +61562,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61580,7 +61580,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61598,7 +61598,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61617,7 +61617,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61635,7 +61635,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61654,7 +61654,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61673,7 +61673,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61691,7 +61691,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61711,7 +61711,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61730,7 +61730,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61749,7 +61749,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61765,7 +61765,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, qmax) {
@@ -61779,7 +61779,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD64, strided_cm) {
@@ -61793,7 +61793,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -61809,7 +61809,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, strided_cn) {
@@ -61823,7 +61823,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, k_eq_8_subtile) {
@@ -61839,7 +61839,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -61856,7 +61856,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61872,7 +61872,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61887,7 +61887,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61905,7 +61905,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61922,7 +61922,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61940,7 +61940,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61957,7 +61957,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -61975,7 +61975,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -61993,7 +61993,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62011,7 +62011,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62030,7 +62030,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62048,7 +62048,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62066,7 +62066,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62085,7 +62085,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62103,7 +62103,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62122,7 +62122,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62141,7 +62141,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62159,7 +62159,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62179,7 +62179,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62198,7 +62198,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62217,7 +62217,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62233,7 +62233,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, qmax) {
@@ -62247,7 +62247,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD64, strided_cm) {
@@ -62261,7 +62261,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -62277,7 +62277,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, strided_cn) {
@@ -62291,7 +62291,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, k_eq_8_subtile) {
@@ -62307,7 +62307,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62324,7 +62324,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62340,7 +62340,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62355,7 +62355,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62373,7 +62373,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62390,7 +62390,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62408,7 +62408,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62425,7 +62425,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62443,7 +62443,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62461,7 +62461,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62479,7 +62479,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62498,7 +62498,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62516,7 +62516,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62534,7 +62534,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62553,7 +62553,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62571,7 +62571,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62590,7 +62590,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62609,7 +62609,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62627,7 +62627,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62647,7 +62647,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62666,7 +62666,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62685,7 +62685,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62701,7 +62701,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, qmax) {
@@ -62715,7 +62715,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD64, strided_cm) {
@@ -62729,7 +62729,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -62745,7 +62745,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, strided_cn) {
@@ -62759,7 +62759,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, k_eq_8_subtile) {
@@ -62775,7 +62775,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62792,7 +62792,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62808,7 +62808,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62823,7 +62823,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62841,7 +62841,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62858,7 +62858,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62876,7 +62876,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62893,7 +62893,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -62911,7 +62911,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62929,7 +62929,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62947,7 +62947,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -62966,7 +62966,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -62984,7 +62984,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63002,7 +63002,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63021,7 +63021,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63039,7 +63039,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63058,7 +63058,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63077,7 +63077,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63095,7 +63095,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63115,7 +63115,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63134,7 +63134,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63153,7 +63153,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63169,7 +63169,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, qmax) {
@@ -63183,7 +63183,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD64, strided_cm) {
@@ -63197,7 +63197,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -63213,7 +63213,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, strided_cn) {
@@ -63227,7 +63227,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, k_eq_8_subtile) {
@@ -63243,7 +63243,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63260,7 +63260,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63276,7 +63276,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63291,7 +63291,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63309,7 +63309,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63326,7 +63326,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63344,7 +63344,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63361,7 +63361,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63379,7 +63379,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63397,7 +63397,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63415,7 +63415,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63434,7 +63434,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63452,7 +63452,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63470,7 +63470,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63489,7 +63489,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63507,7 +63507,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63526,7 +63526,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63545,7 +63545,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63563,7 +63563,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63583,7 +63583,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63602,7 +63602,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63621,7 +63621,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63637,7 +63637,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, qmax) {
@@ -63651,7 +63651,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD64, strided_cm) {
@@ -63665,7 +63665,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -63681,7 +63681,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, strided_cn) {
@@ -63695,7 +63695,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, k_eq_8_subtile) {
@@ -63711,7 +63711,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63728,7 +63728,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63744,7 +63744,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63759,7 +63759,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63777,7 +63777,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63794,7 +63794,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63812,7 +63812,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63829,7 +63829,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63847,7 +63847,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63865,7 +63865,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63883,7 +63883,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63902,7 +63902,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63920,7 +63920,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63938,7 +63938,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -63957,7 +63957,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -63975,7 +63975,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -63994,7 +63994,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64013,7 +64013,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64031,7 +64031,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64051,7 +64051,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64070,7 +64070,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64089,7 +64089,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64105,7 +64105,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, qmax) {
@@ -64119,7 +64119,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD64, strided_cm) {
@@ -64133,7 +64133,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -64149,7 +64149,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, strided_cn) {
@@ -64163,7 +64163,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, k_eq_8_subtile) {
@@ -64179,7 +64179,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64196,7 +64196,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64212,7 +64212,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64227,7 +64227,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64245,7 +64245,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64262,7 +64262,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64280,7 +64280,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64297,7 +64297,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64315,7 +64315,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64333,7 +64333,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64351,7 +64351,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64370,7 +64370,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64388,7 +64388,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64406,7 +64406,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64425,7 +64425,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64443,7 +64443,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64462,7 +64462,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64481,7 +64481,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64499,7 +64499,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64519,7 +64519,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64538,7 +64538,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64557,7 +64557,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64573,7 +64573,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, qmax) {
@@ -64587,7 +64587,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD64, strided_cm) {
@@ -64601,7 +64601,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -64617,7 +64617,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, strided_cn) {
@@ -64631,7 +64631,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, k_eq_8_subtile) {
@@ -64647,7 +64647,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64664,7 +64664,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64680,7 +64680,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64695,7 +64695,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64713,7 +64713,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64730,7 +64730,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64748,7 +64748,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64765,7 +64765,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64783,7 +64783,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64801,7 +64801,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64819,7 +64819,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64838,7 +64838,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64856,7 +64856,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64874,7 +64874,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64893,7 +64893,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64911,7 +64911,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -64930,7 +64930,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -64949,7 +64949,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64967,7 +64967,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -64987,7 +64987,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65006,7 +65006,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65025,7 +65025,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65041,7 +65041,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, qmax) {
@@ -65055,7 +65055,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD64, strided_cm) {
@@ -65069,7 +65069,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -65085,7 +65085,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, strided_cn) {
@@ -65099,7 +65099,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, k_eq_8_subtile) {
@@ -65115,7 +65115,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65132,7 +65132,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65148,7 +65148,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65163,7 +65163,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65181,7 +65181,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65198,7 +65198,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65216,7 +65216,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65233,7 +65233,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65251,7 +65251,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65269,7 +65269,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65287,7 +65287,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65306,7 +65306,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65324,7 +65324,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65342,7 +65342,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65361,7 +65361,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65379,7 +65379,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65398,7 +65398,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65417,7 +65417,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65435,7 +65435,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65455,7 +65455,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65474,7 +65474,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65493,7 +65493,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65509,7 +65509,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, qmax) {
@@ -65523,7 +65523,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD64, strided_cm) {
@@ -65537,7 +65537,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -65553,7 +65553,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, strided_cn) {
@@ -65567,7 +65567,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, k_eq_8_subtile) {
@@ -65583,7 +65583,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65600,7 +65600,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65616,7 +65616,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65631,7 +65631,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65649,7 +65649,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65666,7 +65666,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65684,7 +65684,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65701,7 +65701,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65719,7 +65719,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65737,7 +65737,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65755,7 +65755,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65774,7 +65774,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65792,7 +65792,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65810,7 +65810,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65829,7 +65829,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65847,7 +65847,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65866,7 +65866,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65885,7 +65885,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65903,7 +65903,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65923,7 +65923,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -65942,7 +65942,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -65961,7 +65961,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -65977,7 +65977,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, qmax) {
@@ -65991,7 +65991,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE2_LD128, strided_cm) {
@@ -66005,7 +66005,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -66021,7 +66021,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, strided_cn) {
@@ -66035,7 +66035,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, k_eq_8_subtile) {
@@ -66051,7 +66051,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66068,7 +66068,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66084,7 +66084,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66099,7 +66099,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66117,7 +66117,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66134,7 +66134,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66152,7 +66152,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66169,7 +66169,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66187,7 +66187,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66205,7 +66205,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66223,7 +66223,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66242,7 +66242,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66260,7 +66260,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66278,7 +66278,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66297,7 +66297,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66315,7 +66315,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66334,7 +66334,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66353,7 +66353,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66371,7 +66371,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66391,7 +66391,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66410,7 +66410,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66429,7 +66429,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66445,7 +66445,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, qmax) {
@@ -66459,7 +66459,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE2_LD128, strided_cm) {
@@ -66473,7 +66473,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -66489,7 +66489,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, strided_cn) {
@@ -66503,7 +66503,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, k_eq_8_subtile) {
@@ -66519,7 +66519,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66536,7 +66536,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66552,7 +66552,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66567,7 +66567,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66585,7 +66585,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66602,7 +66602,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66620,7 +66620,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66637,7 +66637,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66655,7 +66655,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66673,7 +66673,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66691,7 +66691,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66710,7 +66710,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66728,7 +66728,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66746,7 +66746,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66765,7 +66765,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66783,7 +66783,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66802,7 +66802,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66821,7 +66821,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66839,7 +66839,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66859,7 +66859,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -66878,7 +66878,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -66897,7 +66897,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -66913,7 +66913,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, qmax) {
@@ -66927,7 +66927,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE2_LD128, strided_cm) {
@@ -66941,7 +66941,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -66957,7 +66957,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, strided_cn) {
@@ -66971,7 +66971,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, k_eq_8_subtile) {
@@ -66987,7 +66987,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67004,7 +67004,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67020,7 +67020,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67035,7 +67035,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67053,7 +67053,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67070,7 +67070,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67088,7 +67088,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67105,7 +67105,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67123,7 +67123,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67141,7 +67141,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67159,7 +67159,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67178,7 +67178,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67196,7 +67196,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67214,7 +67214,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67233,7 +67233,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67251,7 +67251,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67270,7 +67270,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67289,7 +67289,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67307,7 +67307,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67327,7 +67327,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67346,7 +67346,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67365,7 +67365,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67381,7 +67381,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, qmax) {
@@ -67395,7 +67395,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSSE3_LD128, strided_cm) {
@@ -67409,7 +67409,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -67425,7 +67425,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, strided_cn) {
@@ -67439,7 +67439,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, k_eq_8_subtile) {
@@ -67455,7 +67455,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67472,7 +67472,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67488,7 +67488,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67503,7 +67503,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67521,7 +67521,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67538,7 +67538,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67556,7 +67556,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67573,7 +67573,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67591,7 +67591,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67609,7 +67609,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67627,7 +67627,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67646,7 +67646,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67664,7 +67664,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67682,7 +67682,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67701,7 +67701,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67719,7 +67719,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67738,7 +67738,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67757,7 +67757,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67775,7 +67775,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67795,7 +67795,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -67814,7 +67814,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67833,7 +67833,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67849,7 +67849,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, qmax) {
@@ -67863,7 +67863,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSSE3_LD128, strided_cm) {
@@ -67877,7 +67877,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -67893,7 +67893,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, strided_cn) {
@@ -67907,7 +67907,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, k_eq_8_subtile) {
@@ -67923,7 +67923,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -67940,7 +67940,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67956,7 +67956,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67971,7 +67971,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -67989,7 +67989,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68006,7 +68006,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68024,7 +68024,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68041,7 +68041,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68059,7 +68059,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68077,7 +68077,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68095,7 +68095,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68114,7 +68114,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68132,7 +68132,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68150,7 +68150,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68169,7 +68169,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68187,7 +68187,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68206,7 +68206,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68225,7 +68225,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68243,7 +68243,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68263,7 +68263,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68282,7 +68282,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68301,7 +68301,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68317,7 +68317,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, qmax) {
@@ -68331,7 +68331,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSSE3_LD128, strided_cm) {
@@ -68345,7 +68345,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -68361,7 +68361,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, strided_cn) {
@@ -68375,7 +68375,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, k_eq_8_subtile) {
@@ -68391,7 +68391,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68408,7 +68408,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68424,7 +68424,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68439,7 +68439,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68457,7 +68457,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68474,7 +68474,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68492,7 +68492,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68509,7 +68509,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68527,7 +68527,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68545,7 +68545,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68563,7 +68563,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68582,7 +68582,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68600,7 +68600,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68618,7 +68618,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68637,7 +68637,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68655,7 +68655,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68674,7 +68674,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68693,7 +68693,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68711,7 +68711,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68731,7 +68731,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68750,7 +68750,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68769,7 +68769,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68785,7 +68785,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, qmax) {
@@ -68799,7 +68799,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__SSE41_LD128, strided_cm) {
@@ -68813,7 +68813,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -68829,7 +68829,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, strided_cn) {
@@ -68843,7 +68843,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, k_eq_8_subtile) {
@@ -68859,7 +68859,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -68876,7 +68876,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68892,7 +68892,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68907,7 +68907,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68925,7 +68925,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68942,7 +68942,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68960,7 +68960,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -68977,7 +68977,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -68995,7 +68995,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69013,7 +69013,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69031,7 +69031,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69050,7 +69050,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69068,7 +69068,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69086,7 +69086,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69105,7 +69105,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69123,7 +69123,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69142,7 +69142,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69161,7 +69161,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69179,7 +69179,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69199,7 +69199,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69218,7 +69218,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69237,7 +69237,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69253,7 +69253,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, qmax) {
@@ -69267,7 +69267,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__SSE41_LD128, strided_cm) {
@@ -69281,7 +69281,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -69297,7 +69297,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, strided_cn) {
@@ -69311,7 +69311,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, k_eq_8_subtile) {
@@ -69327,7 +69327,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69344,7 +69344,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69360,7 +69360,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69375,7 +69375,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69393,7 +69393,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69410,7 +69410,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69428,7 +69428,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69445,7 +69445,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69463,7 +69463,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69481,7 +69481,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69499,7 +69499,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69518,7 +69518,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69536,7 +69536,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69554,7 +69554,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69573,7 +69573,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69591,7 +69591,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69610,7 +69610,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69629,7 +69629,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69647,7 +69647,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69667,7 +69667,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69686,7 +69686,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69705,7 +69705,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69721,7 +69721,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, qmax) {
@@ -69735,7 +69735,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__SSE41_LD128, strided_cm) {
@@ -69749,7 +69749,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -69765,7 +69765,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, strided_cn) {
@@ -69779,7 +69779,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, k_eq_8_subtile) {
@@ -69795,7 +69795,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69812,7 +69812,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69828,7 +69828,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69843,7 +69843,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69861,7 +69861,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69878,7 +69878,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69896,7 +69896,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69913,7 +69913,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -69931,7 +69931,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -69949,7 +69949,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69967,7 +69967,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -69986,7 +69986,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70004,7 +70004,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70022,7 +70022,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70041,7 +70041,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70059,7 +70059,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70078,7 +70078,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70097,7 +70097,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70115,7 +70115,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70135,7 +70135,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70154,7 +70154,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70173,7 +70173,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70189,7 +70189,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, qmax) {
@@ -70203,7 +70203,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__AVX_LD128, strided_cm) {
@@ -70217,7 +70217,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -70233,7 +70233,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, strided_cn) {
@@ -70247,7 +70247,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, k_eq_8_subtile) {
@@ -70263,7 +70263,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70280,7 +70280,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70296,7 +70296,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70311,7 +70311,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70329,7 +70329,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70346,7 +70346,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70364,7 +70364,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70381,7 +70381,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70399,7 +70399,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70417,7 +70417,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70435,7 +70435,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70454,7 +70454,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70472,7 +70472,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70490,7 +70490,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70509,7 +70509,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70527,7 +70527,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70546,7 +70546,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70565,7 +70565,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70583,7 +70583,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70603,7 +70603,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70622,7 +70622,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70641,7 +70641,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70657,7 +70657,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, qmax) {
@@ -70671,7 +70671,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__AVX_LD128, strided_cm) {
@@ -70685,7 +70685,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -70701,7 +70701,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, strided_cn) {
@@ -70715,7 +70715,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, k_eq_8_subtile) {
@@ -70731,7 +70731,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70748,7 +70748,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70764,7 +70764,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70779,7 +70779,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70797,7 +70797,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70814,7 +70814,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70832,7 +70832,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70849,7 +70849,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -70867,7 +70867,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70885,7 +70885,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70903,7 +70903,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70922,7 +70922,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70940,7 +70940,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70958,7 +70958,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -70977,7 +70977,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -70995,7 +70995,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71014,7 +71014,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71033,7 +71033,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71051,7 +71051,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71071,7 +71071,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71090,7 +71090,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71109,7 +71109,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71125,7 +71125,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, qmax) {
@@ -71139,7 +71139,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__AVX_LD128, strided_cm) {
@@ -71153,7 +71153,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -71169,7 +71169,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, strided_cn) {
@@ -71183,7 +71183,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, k_eq_8_subtile) {
@@ -71199,7 +71199,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71216,7 +71216,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71232,7 +71232,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71247,7 +71247,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71265,7 +71265,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71282,7 +71282,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71300,7 +71300,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71317,7 +71317,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71335,7 +71335,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71353,7 +71353,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71371,7 +71371,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71390,7 +71390,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71408,7 +71408,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71426,7 +71426,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71445,7 +71445,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71463,7 +71463,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71482,7 +71482,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71501,7 +71501,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71519,7 +71519,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71539,7 +71539,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71558,7 +71558,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71577,7 +71577,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71593,7 +71593,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, qmax) {
@@ -71607,7 +71607,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__XOP_LD128, strided_cm) {
@@ -71621,7 +71621,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -71637,7 +71637,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, strided_cn) {
@@ -71651,7 +71651,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, k_eq_8_subtile) {
@@ -71667,7 +71667,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71684,7 +71684,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71700,7 +71700,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71715,7 +71715,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71733,7 +71733,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71750,7 +71750,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71768,7 +71768,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71785,7 +71785,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71803,7 +71803,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71821,7 +71821,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71839,7 +71839,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71858,7 +71858,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71876,7 +71876,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71894,7 +71894,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71913,7 +71913,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71931,7 +71931,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -71950,7 +71950,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -71969,7 +71969,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -71987,7 +71987,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72007,7 +72007,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72026,7 +72026,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72045,7 +72045,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72061,7 +72061,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, qmax) {
@@ -72075,7 +72075,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__XOP_LD128, strided_cm) {
@@ -72089,7 +72089,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -72105,7 +72105,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, strided_cn) {
@@ -72119,7 +72119,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, k_eq_8_subtile) {
@@ -72135,7 +72135,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72152,7 +72152,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72168,7 +72168,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72183,7 +72183,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72201,7 +72201,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72218,7 +72218,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72236,7 +72236,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72253,7 +72253,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72271,7 +72271,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72289,7 +72289,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72307,7 +72307,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72326,7 +72326,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72344,7 +72344,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72362,7 +72362,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72381,7 +72381,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72399,7 +72399,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72418,7 +72418,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72437,7 +72437,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72455,7 +72455,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72475,7 +72475,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72494,7 +72494,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72513,7 +72513,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72529,7 +72529,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, qmax) {
@@ -72543,7 +72543,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__XOP_LD128, strided_cm) {
@@ -72557,7 +72557,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -72573,7 +72573,7 @@
       .m(1)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, strided_cn) {
@@ -72587,7 +72587,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, k_eq_8_subtile) {
@@ -72603,7 +72603,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72620,7 +72620,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72636,7 +72636,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72651,7 +72651,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72669,7 +72669,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72686,7 +72686,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72704,7 +72704,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72721,7 +72721,7 @@
         .m(1)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72739,7 +72739,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72757,7 +72757,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72775,7 +72775,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72794,7 +72794,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72812,7 +72812,7 @@
           .m(1)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72830,7 +72830,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72849,7 +72849,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72867,7 +72867,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72886,7 +72886,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72905,7 +72905,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72923,7 +72923,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72943,7 +72943,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -72962,7 +72962,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -72981,7 +72981,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -72997,7 +72997,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, qmax) {
@@ -73011,7 +73011,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X8C8__AVX2, strided_cm) {
@@ -73025,7 +73025,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -73041,7 +73041,7 @@
       .m(2)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, strided_cn) {
@@ -73055,7 +73055,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, k_eq_8_subtile) {
@@ -73071,7 +73071,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73088,7 +73088,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73104,7 +73104,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73119,7 +73119,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73137,7 +73137,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73154,7 +73154,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73172,7 +73172,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73189,7 +73189,7 @@
         .m(2)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73207,7 +73207,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73225,7 +73225,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73243,7 +73243,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73262,7 +73262,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73280,7 +73280,7 @@
           .m(2)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73298,7 +73298,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73317,7 +73317,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73335,7 +73335,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73354,7 +73354,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73373,7 +73373,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73391,7 +73391,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73411,7 +73411,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73430,7 +73430,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73449,7 +73449,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73465,7 +73465,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, qmax) {
@@ -73479,7 +73479,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X8C8__AVX2, strided_cm) {
@@ -73493,7 +73493,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -73509,7 +73509,7 @@
       .m(3)
       .n(8)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, strided_cn) {
@@ -73523,7 +73523,7 @@
       .n(8)
       .k(8)
       .cn_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, k_eq_8_subtile) {
@@ -73539,7 +73539,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73556,7 +73556,7 @@
         .n(8)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73572,7 +73572,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73587,7 +73587,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73605,7 +73605,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73622,7 +73622,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73640,7 +73640,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73657,7 +73657,7 @@
         .m(3)
         .n(8)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73675,7 +73675,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73693,7 +73693,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73711,7 +73711,7 @@
           .n(8)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73730,7 +73730,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73748,7 +73748,7 @@
           .m(3)
           .n(8)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73766,7 +73766,7 @@
           .n(n)
           .k(k)
           .cn_stride(11)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73785,7 +73785,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73803,7 +73803,7 @@
         .n(8)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73822,7 +73822,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73841,7 +73841,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73859,7 +73859,7 @@
           .n(8)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73879,7 +73879,7 @@
             .k(k)
             .cm_stride(11)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -73898,7 +73898,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -73917,7 +73917,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -73933,7 +73933,7 @@
       .n(8)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, qmax) {
@@ -73947,7 +73947,7 @@
       .n(8)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X8C8__AVX2, strided_cm) {
@@ -73961,7 +73961,7 @@
       .n(8)
       .k(8)
       .cm_stride(11)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2, xnn_init_qs8_conv_minmax_gemmlowp_avx2_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -73977,7 +73977,7 @@
       .m(1)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, strided_cn) {
@@ -73991,7 +73991,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, k_eq_8_subtile) {
@@ -74007,7 +74007,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74024,7 +74024,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74040,7 +74040,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74055,7 +74055,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74073,7 +74073,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74090,7 +74090,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74108,7 +74108,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74125,7 +74125,7 @@
         .m(1)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74143,7 +74143,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74161,7 +74161,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74179,7 +74179,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74198,7 +74198,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74216,7 +74216,7 @@
           .m(1)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74234,7 +74234,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74253,7 +74253,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74271,7 +74271,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74290,7 +74290,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74309,7 +74309,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74327,7 +74327,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74347,7 +74347,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74366,7 +74366,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74385,7 +74385,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74401,7 +74401,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, qmax) {
@@ -74415,7 +74415,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X16C8__AVX512SKX, strided_cm) {
@@ -74429,7 +74429,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -74445,7 +74445,7 @@
       .m(2)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, strided_cn) {
@@ -74459,7 +74459,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, k_eq_8_subtile) {
@@ -74475,7 +74475,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74492,7 +74492,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74508,7 +74508,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74523,7 +74523,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74541,7 +74541,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74558,7 +74558,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74576,7 +74576,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74593,7 +74593,7 @@
         .m(2)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74611,7 +74611,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74629,7 +74629,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74647,7 +74647,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74666,7 +74666,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74684,7 +74684,7 @@
           .m(2)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74702,7 +74702,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74721,7 +74721,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74739,7 +74739,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74758,7 +74758,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74777,7 +74777,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74795,7 +74795,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74815,7 +74815,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -74834,7 +74834,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74853,7 +74853,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74869,7 +74869,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, qmax) {
@@ -74883,7 +74883,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X16C8__AVX512SKX, strided_cm) {
@@ -74897,7 +74897,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -74913,7 +74913,7 @@
       .m(3)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, strided_cn) {
@@ -74927,7 +74927,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, k_eq_8_subtile) {
@@ -74943,7 +74943,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -74960,7 +74960,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74976,7 +74976,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -74991,7 +74991,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75009,7 +75009,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75026,7 +75026,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75044,7 +75044,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75061,7 +75061,7 @@
         .m(3)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75079,7 +75079,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75097,7 +75097,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75115,7 +75115,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75134,7 +75134,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75152,7 +75152,7 @@
           .m(3)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75170,7 +75170,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75189,7 +75189,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75207,7 +75207,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75226,7 +75226,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75245,7 +75245,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75263,7 +75263,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75283,7 +75283,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75302,7 +75302,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75321,7 +75321,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75337,7 +75337,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, qmax) {
@@ -75351,7 +75351,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X16C8__AVX512SKX, strided_cm) {
@@ -75365,7 +75365,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -75381,7 +75381,7 @@
       .m(4)
       .n(16)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, strided_cn) {
@@ -75395,7 +75395,7 @@
       .n(16)
       .k(8)
       .cn_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, k_eq_8_subtile) {
@@ -75411,7 +75411,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75428,7 +75428,7 @@
         .n(16)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75444,7 +75444,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75459,7 +75459,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75477,7 +75477,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75494,7 +75494,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75512,7 +75512,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75529,7 +75529,7 @@
         .m(4)
         .n(16)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75547,7 +75547,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75565,7 +75565,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75583,7 +75583,7 @@
           .n(16)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75602,7 +75602,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75620,7 +75620,7 @@
           .m(4)
           .n(16)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75638,7 +75638,7 @@
           .n(n)
           .k(k)
           .cn_stride(19)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75657,7 +75657,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75675,7 +75675,7 @@
         .n(16)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75694,7 +75694,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75713,7 +75713,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75731,7 +75731,7 @@
           .n(16)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75751,7 +75751,7 @@
             .k(k)
             .cm_stride(19)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75770,7 +75770,7 @@
         .k(k)
         .ks(3)
         .a_offset(163)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75789,7 +75789,7 @@
           .ks(3)
           .a_offset(163)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75805,7 +75805,7 @@
       .n(16)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, qmax) {
@@ -75819,7 +75819,7 @@
       .n(16)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X16C8__AVX512SKX, strided_cm) {
@@ -75833,7 +75833,7 @@
       .n(16)
       .k(8)
       .cm_stride(19)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx, xnn_init_qs8_conv_minmax_gemmlowp_sse4_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -75848,7 +75848,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, strided_cn) {
@@ -75861,7 +75861,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, k_eq_8_subtile) {
@@ -75876,7 +75876,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -75892,7 +75892,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75907,7 +75907,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75921,7 +75921,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75938,7 +75938,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75954,7 +75954,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -75971,7 +75971,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -75987,7 +75987,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76004,7 +76004,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76021,7 +76021,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76038,7 +76038,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76056,7 +76056,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76073,7 +76073,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76090,7 +76090,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76108,7 +76108,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76125,7 +76125,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76143,7 +76143,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76161,7 +76161,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76178,7 +76178,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76197,7 +76197,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76215,7 +76215,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76233,7 +76233,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76248,7 +76248,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, qmax) {
@@ -76261,7 +76261,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD64, strided_cm) {
@@ -76274,7 +76274,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
@@ -76289,7 +76289,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, strided_cn) {
@@ -76302,7 +76302,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, k_eq_8_subtile) {
@@ -76317,7 +76317,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76333,7 +76333,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76348,7 +76348,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76362,7 +76362,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76379,7 +76379,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76395,7 +76395,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76412,7 +76412,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76428,7 +76428,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76445,7 +76445,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76462,7 +76462,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76479,7 +76479,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76497,7 +76497,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76514,7 +76514,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76531,7 +76531,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76549,7 +76549,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76566,7 +76566,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76584,7 +76584,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76602,7 +76602,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76619,7 +76619,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76638,7 +76638,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76656,7 +76656,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76674,7 +76674,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76689,7 +76689,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, qmax) {
@@ -76702,7 +76702,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD64, strided_cm) {
@@ -76715,7 +76715,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
@@ -76730,7 +76730,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, strided_cn) {
@@ -76743,7 +76743,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, k_eq_8_subtile) {
@@ -76758,7 +76758,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76774,7 +76774,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76789,7 +76789,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76803,7 +76803,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76820,7 +76820,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76836,7 +76836,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76853,7 +76853,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76869,7 +76869,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -76886,7 +76886,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76903,7 +76903,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76920,7 +76920,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76938,7 +76938,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -76955,7 +76955,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76972,7 +76972,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -76990,7 +76990,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77007,7 +77007,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77025,7 +77025,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77043,7 +77043,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77060,7 +77060,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77079,7 +77079,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77097,7 +77097,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77115,7 +77115,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77130,7 +77130,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, qmax) {
@@ -77143,7 +77143,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD64, strided_cm) {
@@ -77156,7 +77156,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
@@ -77171,7 +77171,7 @@
       .m(1)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, strided_cn) {
@@ -77184,7 +77184,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, k_eq_8_subtile) {
@@ -77199,7 +77199,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77215,7 +77215,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77230,7 +77230,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77244,7 +77244,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77261,7 +77261,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77277,7 +77277,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77294,7 +77294,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77310,7 +77310,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77327,7 +77327,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77344,7 +77344,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77361,7 +77361,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77379,7 +77379,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77396,7 +77396,7 @@
           .m(1)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77413,7 +77413,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77431,7 +77431,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77448,7 +77448,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77466,7 +77466,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77484,7 +77484,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77501,7 +77501,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77520,7 +77520,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77538,7 +77538,7 @@
         .k(k)
         .ks(3)
         .a_offset(43)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77556,7 +77556,7 @@
           .ks(3)
           .a_offset(43)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77571,7 +77571,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, qmax) {
@@ -77584,7 +77584,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4C8__WASMSIMD_LD128, strided_cm) {
@@ -77597,7 +77597,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
@@ -77612,7 +77612,7 @@
       .m(2)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, strided_cn) {
@@ -77625,7 +77625,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, k_eq_8_subtile) {
@@ -77640,7 +77640,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77656,7 +77656,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77671,7 +77671,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77685,7 +77685,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77702,7 +77702,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77718,7 +77718,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77735,7 +77735,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77751,7 +77751,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77768,7 +77768,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77785,7 +77785,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77802,7 +77802,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77820,7 +77820,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77837,7 +77837,7 @@
           .m(2)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77854,7 +77854,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77872,7 +77872,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77889,7 +77889,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77907,7 +77907,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77925,7 +77925,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77942,7 +77942,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -77961,7 +77961,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -77979,7 +77979,7 @@
         .k(k)
         .ks(3)
         .a_offset(83)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -77997,7 +77997,7 @@
           .ks(3)
           .a_offset(83)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78012,7 +78012,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, qmax) {
@@ -78025,7 +78025,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4C8__WASMSIMD_LD128, strided_cm) {
@@ -78038,7 +78038,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
@@ -78053,7 +78053,7 @@
       .m(3)
       .n(4)
       .k(8)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, strided_cn) {
@@ -78066,7 +78066,7 @@
       .n(4)
       .k(8)
       .cn_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, k_eq_8_subtile) {
@@ -78081,7 +78081,7 @@
           .n(n)
           .k(8)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78097,7 +78097,7 @@
         .n(4)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78112,7 +78112,7 @@
         .n(n)
         .k(8)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78126,7 +78126,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78143,7 +78143,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -78159,7 +78159,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78176,7 +78176,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -78192,7 +78192,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78209,7 +78209,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -78226,7 +78226,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78243,7 +78243,7 @@
           .n(4)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78261,7 +78261,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -78278,7 +78278,7 @@
           .m(3)
           .n(4)
           .k(k)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78295,7 +78295,7 @@
           .n(n)
           .k(k)
           .cn_stride(7)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78313,7 +78313,7 @@
             .n(n)
             .k(k)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -78330,7 +78330,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78348,7 +78348,7 @@
             .k(k)
             .ks(3)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -78366,7 +78366,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78383,7 +78383,7 @@
           .n(4)
           .k(k)
           .ks(3)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78402,7 +78402,7 @@
             .k(k)
             .cm_stride(7)
             .iterations(1)
-            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+            .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
         }
       }
     }
@@ -78420,7 +78420,7 @@
         .k(k)
         .ks(3)
         .a_offset(127)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 
@@ -78438,7 +78438,7 @@
           .ks(3)
           .a_offset(127)
           .zero_index(mz)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78453,7 +78453,7 @@
       .n(4)
       .k(8)
       .qmin(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, qmax) {
@@ -78466,7 +78466,7 @@
       .n(4)
       .k(8)
       .qmax(128)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 
   TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4C8__WASMSIMD_LD128, strided_cm) {
@@ -78479,7 +78479,7 @@
       .n(4)
       .k(8)
       .cm_stride(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128, xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 #endif  // XNN_ARCH_WASMSIMD
 
@@ -78493,7 +78493,7 @@
     .m(1)
     .n(2)
     .k(1)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, strided_cn) {
@@ -78506,7 +78506,7 @@
     .n(2)
     .k(1)
     .cn_stride(5)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, k_eq_1_subtile) {
@@ -78521,7 +78521,7 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -78537,7 +78537,7 @@
       .n(2)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -78552,7 +78552,7 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -78566,7 +78566,7 @@
       .m(1)
       .n(2)
       .k(k)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -78583,7 +78583,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78600,7 +78600,7 @@
         .m(1)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -78617,7 +78617,7 @@
         .n(2)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -78635,7 +78635,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78652,7 +78652,7 @@
         .m(1)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -78669,7 +78669,7 @@
         .n(n)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -78687,7 +78687,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78704,7 +78704,7 @@
       .n(2)
       .k(k)
       .ks(3)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -78722,7 +78722,7 @@
           .k(k)
           .ks(3)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78740,7 +78740,7 @@
         .n(2)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -78757,7 +78757,7 @@
         .n(2)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -78776,7 +78776,7 @@
           .k(k)
           .cm_stride(5)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78794,7 +78794,7 @@
       .k(k)
       .ks(3)
       .a_offset(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -78812,7 +78812,7 @@
         .ks(3)
         .a_offset(7)
         .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -78827,7 +78827,7 @@
     .n(2)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, qmax) {
@@ -78840,7 +78840,7 @@
     .n(2)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X2__SCALAR, strided_cm) {
@@ -78853,7 +78853,7 @@
     .n(2)
     .k(1)
     .cm_stride(5)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 
@@ -78866,7 +78866,7 @@
     .m(2)
     .n(2)
     .k(1)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, strided_cn) {
@@ -78879,7 +78879,7 @@
     .n(2)
     .k(1)
     .cn_stride(5)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, k_eq_1_subtile) {
@@ -78894,7 +78894,7 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -78910,7 +78910,7 @@
       .n(2)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -78925,7 +78925,7 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -78939,7 +78939,7 @@
       .m(2)
       .n(2)
       .k(k)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -78956,7 +78956,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -78973,7 +78973,7 @@
         .m(2)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -78990,7 +78990,7 @@
         .n(2)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -79008,7 +79008,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79025,7 +79025,7 @@
         .m(2)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -79042,7 +79042,7 @@
         .n(n)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -79060,7 +79060,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79077,7 +79077,7 @@
       .n(2)
       .k(k)
       .ks(3)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -79095,7 +79095,7 @@
           .k(k)
           .ks(3)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79113,7 +79113,7 @@
         .n(2)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -79130,7 +79130,7 @@
         .n(2)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -79149,7 +79149,7 @@
           .k(k)
           .cm_stride(5)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79167,7 +79167,7 @@
       .k(k)
       .ks(3)
       .a_offset(13)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -79185,7 +79185,7 @@
         .ks(3)
         .a_offset(13)
         .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -79200,7 +79200,7 @@
     .n(2)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, qmax) {
@@ -79213,7 +79213,7 @@
     .n(2)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X2__SCALAR, strided_cm) {
@@ -79226,7 +79226,7 @@
     .n(2)
     .k(1)
     .cm_stride(5)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 
@@ -79239,7 +79239,7 @@
     .m(3)
     .n(2)
     .k(1)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, strided_cn) {
@@ -79252,7 +79252,7 @@
     .n(2)
     .k(1)
     .cn_stride(5)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, k_eq_1_subtile) {
@@ -79267,7 +79267,7 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -79283,7 +79283,7 @@
       .n(2)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -79298,7 +79298,7 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -79312,7 +79312,7 @@
       .m(3)
       .n(2)
       .k(k)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -79329,7 +79329,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79346,7 +79346,7 @@
         .m(3)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -79363,7 +79363,7 @@
         .n(2)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -79381,7 +79381,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79398,7 +79398,7 @@
         .m(3)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -79415,7 +79415,7 @@
         .n(n)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -79433,7 +79433,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79450,7 +79450,7 @@
       .n(2)
       .k(k)
       .ks(3)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -79468,7 +79468,7 @@
           .k(k)
           .ks(3)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79486,7 +79486,7 @@
         .n(2)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -79503,7 +79503,7 @@
         .n(2)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -79522,7 +79522,7 @@
           .k(k)
           .cm_stride(5)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79540,7 +79540,7 @@
       .k(k)
       .ks(3)
       .a_offset(17)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -79558,7 +79558,7 @@
         .ks(3)
         .a_offset(17)
         .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -79573,7 +79573,7 @@
     .n(2)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, qmax) {
@@ -79586,7 +79586,7 @@
     .n(2)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X2__SCALAR, strided_cm) {
@@ -79599,7 +79599,7 @@
     .n(2)
     .k(1)
     .cm_stride(5)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 
@@ -79612,7 +79612,7 @@
     .m(4)
     .n(2)
     .k(1)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, strided_cn) {
@@ -79625,7 +79625,7 @@
     .n(2)
     .k(1)
     .cn_stride(5)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, k_eq_1_subtile) {
@@ -79640,7 +79640,7 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -79656,7 +79656,7 @@
       .n(2)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -79671,7 +79671,7 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -79685,7 +79685,7 @@
       .m(4)
       .n(2)
       .k(k)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -79702,7 +79702,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79719,7 +79719,7 @@
         .m(4)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -79736,7 +79736,7 @@
         .n(2)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -79754,7 +79754,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79771,7 +79771,7 @@
         .m(4)
         .n(2)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -79788,7 +79788,7 @@
         .n(n)
         .k(k)
         .cn_stride(5)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -79806,7 +79806,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79823,7 +79823,7 @@
       .n(2)
       .k(k)
       .ks(3)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -79841,7 +79841,7 @@
           .k(k)
           .ks(3)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79859,7 +79859,7 @@
         .n(2)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -79876,7 +79876,7 @@
         .n(2)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -79895,7 +79895,7 @@
           .k(k)
           .cm_stride(5)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -79913,7 +79913,7 @@
       .k(k)
       .ks(3)
       .a_offset(23)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -79931,7 +79931,7 @@
         .ks(3)
         .a_offset(23)
         .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -79946,7 +79946,7 @@
     .n(2)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, qmax) {
@@ -79959,7 +79959,7 @@
     .n(2)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X2__SCALAR, strided_cm) {
@@ -79972,7 +79972,7 @@
     .n(2)
     .k(1)
     .cm_stride(5)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 
@@ -79985,7 +79985,7 @@
     .m(1)
     .n(4)
     .k(1)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, strided_cn) {
@@ -79998,7 +79998,7 @@
     .n(4)
     .k(1)
     .cn_stride(7)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, k_eq_1_subtile) {
@@ -80013,7 +80013,7 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -80029,7 +80029,7 @@
       .n(4)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -80044,7 +80044,7 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -80058,7 +80058,7 @@
       .m(1)
       .n(4)
       .k(k)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -80075,7 +80075,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80092,7 +80092,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -80109,7 +80109,7 @@
         .n(4)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -80127,7 +80127,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80144,7 +80144,7 @@
         .m(1)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -80161,7 +80161,7 @@
         .n(n)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -80179,7 +80179,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80196,7 +80196,7 @@
       .n(4)
       .k(k)
       .ks(3)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -80214,7 +80214,7 @@
           .k(k)
           .ks(3)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80232,7 +80232,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -80249,7 +80249,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -80268,7 +80268,7 @@
           .k(k)
           .cm_stride(7)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80286,7 +80286,7 @@
       .k(k)
       .ks(3)
       .a_offset(7)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -80304,7 +80304,7 @@
         .ks(3)
         .a_offset(7)
         .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -80319,7 +80319,7 @@
     .n(4)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, qmax) {
@@ -80332,7 +80332,7 @@
     .n(4)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_1X4__SCALAR, strided_cm) {
@@ -80345,7 +80345,7 @@
     .n(4)
     .k(1)
     .cm_stride(7)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 
@@ -80358,7 +80358,7 @@
     .m(2)
     .n(4)
     .k(1)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, strided_cn) {
@@ -80371,7 +80371,7 @@
     .n(4)
     .k(1)
     .cn_stride(7)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, k_eq_1_subtile) {
@@ -80386,7 +80386,7 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -80402,7 +80402,7 @@
       .n(4)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -80417,7 +80417,7 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -80431,7 +80431,7 @@
       .m(2)
       .n(4)
       .k(k)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -80448,7 +80448,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80465,7 +80465,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -80482,7 +80482,7 @@
         .n(4)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -80500,7 +80500,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80517,7 +80517,7 @@
         .m(2)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -80534,7 +80534,7 @@
         .n(n)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -80552,7 +80552,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80569,7 +80569,7 @@
       .n(4)
       .k(k)
       .ks(3)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -80587,7 +80587,7 @@
           .k(k)
           .ks(3)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80605,7 +80605,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -80622,7 +80622,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -80641,7 +80641,7 @@
           .k(k)
           .cm_stride(7)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80659,7 +80659,7 @@
       .k(k)
       .ks(3)
       .a_offset(13)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -80677,7 +80677,7 @@
         .ks(3)
         .a_offset(13)
         .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -80692,7 +80692,7 @@
     .n(4)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, qmax) {
@@ -80705,7 +80705,7 @@
     .n(4)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_2X4__SCALAR, strided_cm) {
@@ -80718,7 +80718,7 @@
     .n(4)
     .k(1)
     .cm_stride(7)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 
@@ -80731,7 +80731,7 @@
     .m(3)
     .n(4)
     .k(1)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, strided_cn) {
@@ -80744,7 +80744,7 @@
     .n(4)
     .k(1)
     .cn_stride(7)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, k_eq_1_subtile) {
@@ -80759,7 +80759,7 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -80775,7 +80775,7 @@
       .n(4)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -80790,7 +80790,7 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -80804,7 +80804,7 @@
       .m(3)
       .n(4)
       .k(k)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -80821,7 +80821,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80838,7 +80838,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -80855,7 +80855,7 @@
         .n(4)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -80873,7 +80873,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80890,7 +80890,7 @@
         .m(3)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -80907,7 +80907,7 @@
         .n(n)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -80925,7 +80925,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80942,7 +80942,7 @@
       .n(4)
       .k(k)
       .ks(3)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -80960,7 +80960,7 @@
           .k(k)
           .ks(3)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -80978,7 +80978,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -80995,7 +80995,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -81014,7 +81014,7 @@
           .k(k)
           .cm_stride(7)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81032,7 +81032,7 @@
       .k(k)
       .ks(3)
       .a_offset(17)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -81050,7 +81050,7 @@
         .ks(3)
         .a_offset(17)
         .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -81065,7 +81065,7 @@
     .n(4)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, qmax) {
@@ -81078,7 +81078,7 @@
     .n(4)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_3X4__SCALAR, strided_cm) {
@@ -81091,7 +81091,7 @@
     .n(4)
     .k(1)
     .cm_stride(7)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 
@@ -81104,7 +81104,7 @@
     .m(4)
     .n(4)
     .k(1)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, strided_cn) {
@@ -81117,7 +81117,7 @@
     .n(4)
     .k(1)
     .cn_stride(7)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, k_eq_1_subtile) {
@@ -81132,7 +81132,7 @@
         .n(n)
         .k(1)
         .iterations(1)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -81148,7 +81148,7 @@
       .n(4)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -81163,7 +81163,7 @@
       .n(n)
       .k(1)
       .iterations(1)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -81177,7 +81177,7 @@
       .m(4)
       .n(4)
       .k(k)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -81194,7 +81194,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81211,7 +81211,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -81228,7 +81228,7 @@
         .n(4)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -81246,7 +81246,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81263,7 +81263,7 @@
         .m(4)
         .n(4)
         .k(k)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -81280,7 +81280,7 @@
         .n(n)
         .k(k)
         .cn_stride(7)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -81298,7 +81298,7 @@
           .n(n)
           .k(k)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81315,7 +81315,7 @@
       .n(4)
       .k(k)
       .ks(3)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -81333,7 +81333,7 @@
           .k(k)
           .ks(3)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81351,7 +81351,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -81368,7 +81368,7 @@
         .n(4)
         .k(k)
         .ks(3)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -81387,7 +81387,7 @@
           .k(k)
           .cm_stride(7)
           .iterations(1)
-          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+          .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
       }
     }
   }
@@ -81405,7 +81405,7 @@
       .k(k)
       .ks(3)
       .a_offset(23)
-      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+      .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
   }
 }
 
@@ -81423,7 +81423,7 @@
         .ks(3)
         .a_offset(23)
         .zero_index(mz)
-        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+        .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
     }
   }
 }
@@ -81438,7 +81438,7 @@
     .n(4)
     .k(1)
     .qmin(128)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, qmax) {
@@ -81451,7 +81451,7 @@
     .n(4)
     .k(1)
     .qmax(128)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
 
 TEST(QS8_IGEMM_MINMAX_GEMMLOWP_4X4__SCALAR, strided_cm) {
@@ -81464,5 +81464,5 @@
     .n(4)
     .k(1)
     .cm_stride(7)
-    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
+    .Test(xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar, xnn_init_qs8_conv_minmax_gemmlowp_scalar_params, xnn_init_qs8_requantization_gemmlowp_params, xnn_qs8_requantize_gemmlowp);
 }
diff --git a/test/qs8-igemm-minmax-gemmlowp.yaml b/test/qs8-igemm-minmax-gemmlowp.yaml
index 21387ba..b54a10d 100644
--- a/test/qs8-igemm-minmax-gemmlowp.yaml
+++ b/test/qs8-igemm-minmax-gemmlowp.yaml
@@ -4,530 +4,530 @@
 # LICENSE file in the root directory of this source tree.
 
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c4__neondot
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x8c4__neondot
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x8c4__neondot
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x8c4__neondot
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c4__neondot
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__neondot
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_6x16c4__neondot
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_8x16c4__neondot
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55
-  init: xnn_init_qs8_conv_minmax_neon_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_neon_params
   k-block: 16
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128
-  init: xnn_init_qs8_conv_minmax_sse2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x8c8__avx2
-  init: xnn_init_qs8_conv_minmax_avx2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_avx2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x8c8__avx2
-  init: xnn_init_qs8_conv_minmax_avx2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_avx2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x8c8__avx2
-  init: xnn_init_qs8_conv_minmax_avx2_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_avx2_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx
-  init: xnn_init_qs8_conv_minmax_sse4_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_sse4_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64
-  init: xnn_init_qs8_conv_minmax_wasmsimd_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64
-  init: xnn_init_qs8_conv_minmax_wasmsimd_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64
-  init: xnn_init_qs8_conv_minmax_wasmsimd_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128
-  init: xnn_init_qs8_conv_minmax_wasmsimd_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128
-  init: xnn_init_qs8_conv_minmax_wasmsimd_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128
-  init: xnn_init_qs8_conv_minmax_wasmsimd_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_wasmsimd_params
   k-block: 8
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x2__scalar
-  init: xnn_init_qs8_conv_minmax_scalar_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_scalar_params
   k-block: 1
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x2__scalar
-  init: xnn_init_qs8_conv_minmax_scalar_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_scalar_params
   k-block: 1
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x2__scalar
-  init: xnn_init_qs8_conv_minmax_scalar_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_scalar_params
   k-block: 1
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x2__scalar
-  init: xnn_init_qs8_conv_minmax_scalar_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_scalar_params
   k-block: 1
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_1x4__scalar
-  init: xnn_init_qs8_conv_minmax_scalar_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_scalar_params
   k-block: 1
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_2x4__scalar
-  init: xnn_init_qs8_conv_minmax_scalar_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_scalar_params
   k-block: 1
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_3x4__scalar
-  init: xnn_init_qs8_conv_minmax_scalar_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_scalar_params
   k-block: 1
 - name: xnn_qs8_igemm_minmax_gemmlowp_ukernel_4x4__scalar
-  init: xnn_init_qs8_conv_minmax_scalar_params
+  init: xnn_init_qs8_conv_minmax_gemmlowp_scalar_params
   k-block: 1